For the following analyses we will require the use of a number of different R packages. We can use the following code to quickly load in the packages and install any packages not previously installed in the R console.
if (!require("pacman")) install.packages("pacman")
pacman::p_load_gh("pmartinezarbizu/pairwiseAdonis/pairwiseAdonis", "ropensci/rnaturalearthhires", "KarstensLab/microshades")
pacman::p_load("flextable", "cowplot", "car", "ggrepel", "ggspatial", "paletteer", "patchwork", "rnaturalearth", "sf", "Hmisc", "MCMC.OTU", "pairwiseAdonis", "RColorBrewer", "Redmonder", "lubridate", "officer", "adegenet", "dendextend", "gdata", "ggdendro", "hierfstat", "kableExtra", "poppr", "reshape2", "StAMPP", "vcfR", "vegan", "boa", "magick", "sdmpredictors", "ggcorrplot", "tidyverse", "TeachingDemos", "LaplacesDemon", "adespatial", "ggnewscale", "ggbeeswarm", "multcomp", "rstatix", "R.utils", "graph4lg", "ggstance", "ggtreeExtra", "ggtree", "multcompView")
options("scipen" = 10)
load("nwgom.RData")
Making color palettes to use throughout all plots
gomPal = c("#D53E4F", "#F88D51", "#FEE08B", "#66C2A5", "#3288BD")
boundPal = c("gray30", paletteer_d("vapoRwave::vapoRwave")[10])
pink = "#FF6A8BFF"
purple = paletteer_d("vapoRwave::vapoRwave")[10]
# mcavKColPal = c(colorRampPalette(colors = c("#1B9E77","#FFFFFF"))(3)[1:2], "azure3")
# sintKColPal = c(colorRampPalette(colors = c("#D95F02","#FFFFFF"))(3)[1:2], "azure3")
# ofavKColPal = colorRampPalette(colors = c("#7570B3","#FFFFFF"))(4)[1:3]
# xestoKColPal = c(colorRampPalette(colors = c("#E7298A","#FFFFFF"))(7)[c(1:6)], "azure3")
mcavKColPal = c(colorRampPalette(colors = c("#1B9E77","#FFFFFF"))(7)[c(1,5,2,3,4,6)], "azure3")
sintKColPal = c(colorRampPalette(colors = c("#D95F02","#FFFFFF"))(7)[c(1,5,2,3,4,6)], "azure3")
ofavKColPal = colorRampPalette(colors = c("#7570B3","#FFFFFF"))(7)[c(1,5,3,2,4,6)]
xestoKColPal = c(colorRampPalette(colors = c("#E7298A","#FFFFFF"))(7)[c(1:6)], "azure3")
profPal = rev(c(microshades_palette("micro_green", 5), microshades_palette("micro_cvd_turquoise", 5), microshades_palette("micro_cvd_orange", 3),microshades_palette("micro_cvd_purple", 1, lightest = F), microshades_palette("micro_purple", 5)))
colPalZoox = c("#6A4C93", "#1BE7FF", "#C6FCA2", "#FF6A8B")
Plot themes
dendTheme = theme(axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_text(size = 10, color = "black", angle = 90),
axis.text.y = element_text(size = 8, color = "black"),
axis.line.y = element_line(),
axis.ticks.y = element_line(),
panel.grid = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
legend.key.size = unit(0.75, "line"),
legend.key = element_blank(),
legend.title = element_text(size = 10),
legend.text = element_text(size = 8),
legend.direction = "horizontal",
legend.box = "vertical",
legend.position = c(0.13, 0.1))
pcaTheme = theme(axis.title.x = element_text(color = "black", size = 10),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.line.x = element_blank(),
axis.title.y = element_text(color = "black", size = 10),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.y = element_blank(),
legend.position = "none",
legend.title = element_text(size = 10),
legend.text = element_text(size = 8),
legend.key.size = unit(5, "pt"),
panel.border = element_rect(color = "black", linewidth = 0.5),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
admixTheme = theme(
panel.grid = element_blank(),
panel.background = element_rect(fill = "gray85"),
plot.background = element_blank(),
panel.border = element_rect(fill = NA, color = "black", linewidth = 0.75, linetype = "solid"),
panel.spacing.x = grid:::unit(0.05, "lines"),
panel.spacing.y = grid:::unit(0.05, "lines"),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
axis.title = element_blank(),
strip.background.x = element_blank(),
strip.background.y = element_blank(),
strip.text = element_text(size = 8),
strip.text.y.left = element_text(size = 10, angle = 90),
strip.text.x.bottom = element_text(vjust = 1, color = NA),
legend.key = element_blank(),
legend.position = "none",
legend.title = element_text(size = 8))
violinTheme = theme(axis.title = element_text(color = "black", size = 12),
axis.ticks = element_blank(),
axis.text.x = element_text(color = "black", size = 10),
legend.position = "none",
legend.key.size = unit(0.3, 'cm'),
panel.border = element_rect(color = "black", linewidth = .5),
panel.background = element_blank(),
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
zooxTheme = theme(plot.title = element_text(),
panel.grid = element_blank(),
# panel.background = element_blank(),
panel.background = element_rect(fill = "gray85"),
panel.border = element_rect(fill = NA, color = "black", linewidth = 0.75, linetype = "solid"),
panel.spacing.x = grid:::unit(0.05, "lines"),
panel.spacing.y = grid:::unit(0.82, "lines"),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
axis.title = element_blank(),
strip.background.x = element_blank(),
strip.background.y = element_blank(),
strip.text = element_text(size = 12),
strip.text.y.left = element_text(size = 12, angle = 90),
strip.text.x.bottom = element_text(vjust = -.1, color = NA),
legend.title = element_text(size = 10),
legend.text = element_text(size = 8),
legend.key.size = unit(0.75, "line"),
legend.key = element_blank(),
legend.position = "right",
legend.direction = "vertical",
legend.box = "vertical")
nwgomSites = read.csv("../data/nwgomMetaData.csv", header = TRUE)
nwgomSites$depthZone = factor(nwgomSites$depthZone)
nwgomSites$depthZone = factor(nwgomSites$depthZone, levels = levels(nwgomSites$depthZone)[c(2,1)])
nwgomSites$site = factor(nwgomSites$site)
nwgomSites$bank = factor(nwgomSites$bank)
nwgomSites$bank = factor(nwgomSites$bank, levels = levels(nwgomSites$bank)[c(5, 2, 1, 3, 4)])
nwgomSites$date = mdy(nwgomSites$date) %>% format("%d %b %Y")
nwgomBanks = nwgomSites %>% group_by(bank) %>% summarise(latDD = mean(latDD), longDD = mean(longDD), n = n()) %>% droplevels()
nwgomsampleSites = nwgomSites %>% group_by(bank, site, depthZone) %>% summarise(latDD = min(latDD), longDD = min(longDD))
## `summarise()` has grouped output by 'bank', 'site'. You can override using the
## `.groups` argument.
fgbnmsBounds = read_sf("../data/shp/FGBNMS_py.shp") %>% st_transform(crs = 4326)
fgbnmsBounds$Bank = c("NA","NA","Geyer","NA","Bright","WFGB","NA","McGrail","NA","NA","NA","McGrail","EFGB","NA","NA","NA","NA","NA","NA")
fgbnmsBounds$Bank = factor(fgbnmsBounds$Bank)
fgbnmsBounds$Bank = factor(fgbnmsBounds$Bank, levels = levels(fgbnmsBounds$Bank)[c(6, 2, 1, 3, 4,5)])
states = st_as_sf(ne_states(country = c("United States of America")), scale = "count", crs = 4326) %>% filter(name_en %in% c("Florida", "Alabama","Mississippi", "Louisiana", "Texas", "Georgia", "South Carolina"))
countries = st_as_sf(ne_countries(country = c("Cuba", "Mexico", "Cayman Islands", "The Bahamas", "Belize", "Guatemala"), scale = "Large"), crs = 4326)
# bahamas = read_sf("../data/shp/bahamasShoreline.shp") %>% st_transform(crs = 4326)
# cuba = read_sf("../data/shp/cubaShoreline.shp") %>% st_transform(crs = 4326)
# florida = read_sf("../data/shp/floridaShoreline.shp") %>% st_transform(crs = 4326)
wfgbBathy = read_sf("../data/shp/wb_10m_Cleaned.shp") %>% st_transform(crs = 4326)
efgbBathy = read_sf("../data/shp/eb_10m_Cleaned.shp") %>% st_transform(crs = 4326)
brightBathy = read_sf("../data/shp/Bright_10m_Cleaned.shp") %>% st_transform(crs = 4326)
geyerBathy = read_sf("../data/shp/Geyer_10m_Cleaned.shp") %>% st_transform(crs = 4326)
mcgrailBathy = read_sf("../data/shp/McGrail_10m_Cleaned.shp") %>% st_transform(crs = 4326)
nwgomMap = ggplot() +
geom_sf(data = fgbnmsBounds %>% filter(Bank != "NA"), aes(fill = Bank), linewidth = 0.25, color = "black") +
geom_sf(data = fgbnmsBounds %>% filter(Bank == "NA"), fill = "black", linewidth = 0.25, color = "black", alpha = 0.1) +
geom_sf(data = states, fill = "white", linewidth = 0.3) +
scale_fill_manual(values = c(gomPal,"black"), name = "Bank",) +
geom_point(data = nwgomSites, aes(x = longDD, y = latDD, shape = depthZone), color = NA, fill = NA) +
scale_shape_manual(values = c(21, 23), name = "Depth") +
coord_sf(xlim = c(-94.5, -91.5), ylim = c(26.75, 29.75)) +
annotation_scale(location = "br") +
guides(fill = guide_legend(override.aes = list(shape = 22, color = "black", size = 2.5, stroke = 0.25), order = 1), shape = guide_legend(override.aes = list(size = c(2.5, 2.25), stroke = 0.25, color = "black"), order = 2), color = "none") +
theme_bw() +
theme(panel.background = element_rect(fill = "aliceblue"),
plot.background = element_blank(),
panel.border = element_rect(color = "black", size = 1, fill = NA),
axis.title = element_blank(),
axis.ticks = element_line(color = "black"),
axis.text = element_text(color = "black"),
legend.position = c(0.9085, 0.882),
legend.box.background = element_rect(linewidth = 0.45, fill = "white"),
legend.title = element_text(color = "black", size = 8),
legend.text = element_text(color = "black", size = 8),
legend.spacing = unit(-5, "pt"),
legend.key.size = unit(5, "pt"),
legend.key = element_blank(),
legend.background = element_blank()
)
nwgomMap
largeMap = ggplot() +
geom_sf(data = states, fill = "white", linewidth = 0.3) +
geom_sf(data = countries, fill = "white", linewidth = 0.3) +
geom_sf(data = fgbnmsBounds, fill = "black", linewidth = 0.1, color = "black", alpha = 0.1) +
geom_rect(aes(xmin = -95, xmax = -91, ymin = 26, ymax = 30), fill = NA, color = "black", linewidth = 0.75) +
coord_sf(xlim = c(-99, -80), ylim = c(18, 32)) +
theme_bw() +
theme(legend.title = element_text(size = 9, face = "bold"),
axis.ticks = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
panel.background = element_rect(fill = "aliceblue"),
panel.border = element_rect(color = "black", size = 1, fill = NA),
legend.position = "none",
plot.background = element_blank())
largeMap
inset = ggplot() +
geom_sf(data = wfgbBathy, color = "gray75", size = 0.25) +
geom_sf(data = efgbBathy, color = "gray75", size = 0.25) +
geom_sf(data = brightBathy, color = "gray75", size = 0.25) +
geom_sf(data = geyerBathy, color = "gray75", size = 0.25) +
geom_sf(data = mcgrailBathy, color = "gray75", size = 0.25) +
geom_sf(data = fgbnmsBounds %>% filter(Bank != "NA"), aes(fill = Bank), linewidth = 0.4, color = "black", alpha = 0.2) +
geom_sf(data = fgbnmsBounds %>% filter(Bank == "NA"), fill = "black", linewidth = 0.4, color = "black", alpha = 0.2) +
geom_point(data = nwgomsampleSites, aes(x = longDD, y = latDD, fill = bank, shape = depthZone, size = depthZone), alpha = 0.75) +
scale_fill_manual(values = gomPal, name = "Bank") +
scale_shape_manual(values = c(21, 23), name = "Depth") +
scale_size_manual(values = c(2.5, 2.25)) +
theme_bw() +
theme(legend.title = element_text(size = 9, face = "bold"),
axis.ticks = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
panel.background = element_rect(fill = "aliceblue"),
panel.border = element_rect(color = "black", size = 1, fill = NA),
legend.position = "none",
plot.background = element_blank())
wfgb = inset +
geom_label(aes(x = -93.8269, y = 27.882), label = "WFGB", fill = gomPal[1], color = "black", size = 3) +
annotation_scale(location = "br") +
coord_sf(xlim = c(-93.83, -93.808), ylim = c(27.867, 27.883)) +
scale_x_continuous(n.breaks = 10) +
scale_y_continuous(n.breaks = 10)
efgb = inset +
geom_label(aes(x = -93.6074, y = 27.92435), label = "EFGB", fill = gomPal[2], color = "black", size = 3) +
annotation_scale(location = "br") +
coord_sf(xlim = c(-93.61, -93.59), ylim = c(27.905, 27.925)) +
scale_x_continuous(n.breaks = 10) +
scale_y_continuous(n.breaks = 10)
bright = inset +
geom_label(aes(x = -93.3135, y = 27.8984), label = "Bright", fill = gomPal[3], color = "black", size = 3) +
annotation_scale(location = "br") +
coord_sf(xlim = c(-93.316, -93.296), ylim = c(27.879, 27.899)) +
scale_x_continuous(n.breaks = 10) +
scale_y_continuous(n.breaks = 10)
geyer = inset +
geom_label(aes(x = -93.07329, y = 27.8614), label = "Geyer", fill = gomPal[4], color = "black", size = 3) +
annotation_scale(location = "br") +
coord_sf(xlim = c(-93.076, -93.056), ylim = c(27.842, 27.862)) +
scale_x_continuous(n.breaks = 10) +
scale_y_continuous(n.breaks = 10)
mcgrail = inset +
geom_label(aes(x = -92.6018, y = 27.9691), label = "McGrail", fill = gomPal[5], color = "black", size = 3) +
annotation_scale(location = "br") +
coord_sf(xlim = c(-92.605, -92.585), ylim = c(27.955, 27.97)) +
scale_x_continuous(n.breaks = 10) +
scale_y_continuous(n.breaks = 10)
map = (nwgomMap + inset_element(largeMap, top = 1.12, right = 0.33, bottom = 0.63, left = -0.0075, ignore_tag = TRUE) +
inset_element(wfgb, top = 0.8, right = 0.2875, bottom = 0.5, left = -0.0075, ignore_tag = TRUE) +
inset_element(efgb, top = 0.33, right = 0.2875, bottom = 0.04, left = -0.0075, ignore_tag = TRUE) +
inset_element(bright, top = 0.33, right = 0.68, bottom = 0.04, left = 0.33, ignore_tag = TRUE) +
inset_element(geyer, top = 0.33, right = 1.0, bottom = 0.04, left = 0.705, ignore_tag = TRUE) +
inset_element(mcgrail, top = 0.8, right = 1.0, bottom = 0.5, left = 0.7, ignore_tag = TRUE)
)
ggsave("../figures/figure1.png", plot = map, height = 7, width = 7, units = "in", dpi = 300)
mcavCloneBams = read.csv("../data/nwgomMetaData.csv") %>% dplyr::filter(species == "M. cavernosa") %>% dplyr::filter(!sample %in% c("MGM010")) # list of bam files
mcavCloneMa = as.matrix(read.table("../data/mcav/mcavClones.ibsMat")) # reads in IBS matrix produced by ANGSD
dimnames(mcavCloneMa) = list(mcavCloneBams[,1],mcavCloneBams[,1])
mcavClonesHc = hclust(as.dist(mcavCloneMa),"ave")
mcavClonePops = mcavCloneBams$bank
mcavCloneDepth = mcavCloneBams$depthZone
mcavCloneDend = mcavCloneMa %>% as.dist() %>% hclust(.,"ave") %>% as.dendrogram()
mcavCloneDData = mcavCloneDend %>% dendro_data()
# Making the branches hang shorter so we can easily see clonal groups
mcavCloneDData$segments$yend2 = mcavCloneDData$segments$yend
for(i in 1:nrow(mcavCloneDData$segments)) {
if (mcavCloneDData$segments$yend2[i] == 0) {
mcavCloneDData$segments$yend2[i] = (mcavCloneDData$segments$y[i] - 0.01)}}
mcavCloneDendPoints = mcavCloneDData$labels
mcavCloneDendPoints$pop = mcavClonePops[order.dendrogram(mcavCloneDend)]
mcavCloneDendPoints$depth=mcavCloneDepth[order.dendrogram(mcavCloneDend)]
rownames(mcavCloneDendPoints) = mcavCloneDendPoints$label
# Making points at the leaves to place symbols for populations
point = as.vector(NA)
for(i in 1:nrow(mcavCloneDData$segments)) {
if (mcavCloneDData$segments$yend[i] == 0) {
point[i] = mcavCloneDData$segments$y[i] - 0.01
} else {
point[i] = NA}}
mcavCloneDendPoints$y = point[!is.na(point)]
mcavTechReps = c("MGM002.1", "MGM002.2", "MGM002.3", "MGM008.1", "MGM008.2", "MGM008.3", "MGM013.1", "MGM013.2", "MGM013.3", "MGM024.1", "MGM024.2", "MGM038.1", "MGM038.2", "MGM038.3", "MGM072.1", "MGM072.2", "MGM072.3")
mcavCloneDendPoints$depth = factor(mcavCloneDendPoints$depth)
mcavCloneDendPoints$depth = factor(mcavCloneDendPoints$depth,levels(mcavCloneDendPoints$depth)[c(2,1)])
mcavCloneDendPoints$pop = factor(mcavCloneDendPoints$pop)
mcavCloneDendPoints$pop = factor(mcavCloneDendPoints$pop,levels(mcavCloneDendPoints$pop)[c(5, 2, 1, 4, 3)])
mcavCloneDendA = ggplot() +
geom_segment(data = segment(mcavCloneDData), aes(x = x, y = y, xend = xend, yend = yend2), size = 0.5) +
geom_point(data = mcavCloneDendPoints, aes(x = x, y = y, fill = pop, shape = depth), size = 4, stroke = 0.25) +
scale_fill_manual(values =gomPal, name= "Bank")+
scale_shape_manual(values = c(21, 23), name = "Depth Zone")+
geom_hline(yintercept = 0.1575, color = pink, lty = 5, size = 0.75) + # creating a dashed line to indicate a clonal distance threshold
geom_text(data = subset(mcavCloneDendPoints, subset = label %in% mcavTechReps), aes(x = x, y = (y - .015), label = label), angle = 90) + # spacing technical replicates further from leaf
geom_text(data = subset(mcavCloneDendPoints, subset = !label %in% mcavTechReps), aes(x = x, y = (y - .010), label = label), angle = 90) +
labs(y = "Genetic distance (1 - IBS)") +
guides(fill = guide_legend(override.aes = list(shape = 22)))+
coord_cartesian(xlim = c(0,182), ylim = c(0.075, 0.3),expand = FALSE) +
theme_classic()
mcavCloneDend = mcavCloneDendA + theme(
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_text(size = 12, color = "black", angle = 90),
axis.text.y = element_text(size = 10, color = "black"),
axis.line.y = element_line(),
axis.ticks.y = element_line(),
panel.grid = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
plot.background = element_blank(),
legend.key = element_blank(),
legend.title = element_text(size = 12),
legend.text = element_text(size = 10),
legend.position = "bottom")
mcavCloneDend
ggsave("../figures/extras/mcavCloneDend.png", plot = mcavCloneDend, height = 9, width = 32, units = "in", dpi = 300)
sintCloneBams = read.csv("../data/nwgomMetaData.csv") %>% dplyr::filter(species == "S. intersepta") %>% dplyr::filter(!sample %in% c("SGM013", "SGM169", "SGM177"))
sintCloneMa = as.matrix(read.table("../data/sint/sintClones.ibsMat")) # reads in IBS matrix produced by ANGSD
dimnames(sintCloneMa) = list(sintCloneBams[,1],sintCloneBams[,1])
sintClonesHc = hclust(as.dist(sintCloneMa),"ave")
sintClonePops = sintCloneBams$bank
sintCloneDepth = sintCloneBams$depthZone
sintCloneDend = sintCloneMa %>% as.dist() %>% hclust(.,"ave") %>% as.dendrogram()
sintCloneDData = sintCloneDend %>% dendro_data()
# Making the branches hang shorter so we can easily see clonal groups
sintCloneDData$segments$yend2 = sintCloneDData$segments$yend
for(i in 1:nrow(sintCloneDData$segments)) {
if (sintCloneDData$segments$yend2[i] == 0) {
sintCloneDData$segments$yend2[i] = (sintCloneDData$segments$y[i] - 0.01)}}
sintCloneDendPoints = sintCloneDData$labels
sintCloneDendPoints$pop = sintClonePops[order.dendrogram(sintCloneDend)]
sintCloneDendPoints$depth=sintCloneDepth[order.dendrogram(sintCloneDend)]
rownames(sintCloneDendPoints) = sintCloneDendPoints$label
# Making points at the leaves to place symbols for populations
point = as.vector(NA)
for(i in 1:nrow(sintCloneDData$segments)) {
if (sintCloneDData$segments$yend[i] == 0) {
point[i] = sintCloneDData$segments$y[i] - 0.01
} else {
point[i] = NA}}
sintCloneDendPoints$y = point[!is.na(point)]
sintTechReps = c("SGM027.1", "SGM027.2", "SGM027.3", "SGM046.1", "SGM046.2", "SGM046.3", "SGM152.1", "SGM152.2", "SGM152.3", "SGM186.1", "SGM186.2", "SGM186.3", "SGM197.1", "SGM197.2", "SGM197.3", "SGM206.1", "SGM206.2", "SGM206.3")
sintCloneDendPoints$depth = factor(sintCloneDendPoints$depth)
sintCloneDendPoints$depth = factor(sintCloneDendPoints$depth,levels(sintCloneDendPoints$depth)[c(2,1)])
sintCloneDendPoints$pop = factor(sintCloneDendPoints$pop)
sintCloneDendPoints$pop = factor(sintCloneDendPoints$pop,levels(sintCloneDendPoints$pop)[c(5, 2, 1, 4, 3)])
sintCloneDendA = ggplot() +
geom_segment(data = segment(sintCloneDData), aes(x = x, y = y, xend = xend, yend = yend2), size = 0.5) +
geom_point(data = sintCloneDendPoints, aes(x = x, y = y, fill = pop, shape = depth), size = 4, stroke = 0.25) +
scale_fill_manual(values = gomPal, name= "Bank")+
scale_shape_manual(values = c(24, 25), name = "Depth Zone")+
geom_hline(yintercept = 0.154, color = pink, lty = 5, size = 0.75) + # creating a dashed line to indicate a clonal distance threshold
geom_text(data = subset(sintCloneDendPoints, subset = label %in% sintTechReps), aes(x = x, y = (y - .015), label = label), angle = 90) + # spacing technical replicates further from leaf
geom_text(data = subset(sintCloneDendPoints, subset = !label %in% sintTechReps), aes(x = x, y = (y - .010), label = label), angle = 90) +
labs(y = "Genetic distance (1 - IBS)") +
guides(fill = guide_legend(override.aes = list(shape = 22)))+
coord_cartesian(xlim = c(0, 220), ylim = c(0.075, 0.3), expand = FALSE) +
theme_classic()
sintCloneDend = sintCloneDendA + theme(
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_text(size = 12, color = "black", angle = 90),
axis.text.y = element_text(size = 10, color = "black"),
axis.line.y = element_line(),
axis.ticks.y = element_line(),
panel.grid = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
plot.background = element_blank(),
legend.key = element_blank(),
legend.title = element_text(size = 12),
legend.text = element_text(size = 10),
legend.position = "bottom")
sintCloneDend
ggsave("../figures/extras/sintCloneDend.png", plot = sintCloneDend, height = 9, width = 30, units = "in", dpi = 300)
ofavCloneBams = read.csv("../data/nwgomMetaData.csv") %>% dplyr::filter(species == "O. faveolata") %>% dplyr::filter(!sample %in% c("OGM003", "OGM043", "OGM075", "OGM076", "OGM121"))
ofavCloneMa = as.matrix(read.table("../data/ofav/ofavClones.ibsMat")) # reads in IBS matrix produced by ANGSD
dimnames(ofavCloneMa) = list(ofavCloneBams[,1],ofavCloneBams[,1])
ofavClonesHc = hclust(as.dist(ofavCloneMa),"ave")
ofavClonePops = ofavCloneBams$bank
ofavCloneDepth = ofavCloneBams$depthZone
ofavCloneDend = ofavCloneMa %>% as.dist() %>% hclust(.,"ave") %>% as.dendrogram()
ofavCloneDData = ofavCloneDend %>% dendro_data()
# Making the branches hang shorter so we can easily see clonal groups
ofavCloneDData$segments$yend2 = ofavCloneDData$segments$yend
for(i in 1:nrow(ofavCloneDData$segments)) {
if (ofavCloneDData$segments$yend2[i] == 0) {
ofavCloneDData$segments$yend2[i] = (ofavCloneDData$segments$y[i] - 0.01)}}
ofavCloneDendPoints = ofavCloneDData$labels
ofavCloneDendPoints$pop = ofavClonePops[order.dendrogram(ofavCloneDend)]
ofavCloneDendPoints$depth=ofavCloneDepth[order.dendrogram(ofavCloneDend)]
rownames(ofavCloneDendPoints) = ofavCloneDendPoints$label
# Making points at the leaves to place symbols for populations
point = as.vector(NA)
for(i in 1:nrow(ofavCloneDData$segments)) {
if (ofavCloneDData$segments$yend[i] == 0) {
point[i] = ofavCloneDData$segments$y[i] - 0.01
} else {
point[i] = NA}}
ofavCloneDendPoints$y = point[!is.na(point)]
ofavTechReps = c("OGM024.1", "OGM024.2", "OGM024.3", "OGM066.1", "OGM066.2", "OGM066.3", "OGM108.1", "OGM108.2", "OGM108.3")
ofavCloneDendPoints$depth = factor(ofavCloneDendPoints$depth)
ofavCloneDendPoints$depth = factor(ofavCloneDendPoints$depth,levels(ofavCloneDendPoints$depth)[c(2,1)])
ofavCloneDendPoints$pop = factor(ofavCloneDendPoints$pop)
ofavCloneDendPoints$pop = factor(ofavCloneDendPoints$pop,levels(ofavCloneDendPoints$pop)[c(5, 2, 1, 4, 3)])
ofavCloneDendA = ggplot() +
geom_segment(data = segment(ofavCloneDData), aes(x = x, y = y, xend = xend, yend = yend2), size = 0.5) +
geom_point(data = ofavCloneDendPoints, aes(x = x, y = y, fill = pop, shape = depth), size = 4, stroke = 0.25) +
scale_fill_manual(values = gomPal, name= "Bank")+
scale_shape_manual(values = c(24, 25), name = "Depth Zone")+
geom_hline(yintercept = 0.1425, color = pink, lty = 5, size = 0.75) + # creating a dashed line to indicate a clonal distance threshold
geom_text(data = subset(ofavCloneDendPoints, subset = label %in% ofavTechReps), aes(x = x, y = (y - .015), label = label), angle = 90) + # spacing technical replicates further from leaf
geom_text(data = subset(ofavCloneDendPoints, subset = !label %in% ofavTechReps), aes(x = x, y = (y - .010), label = label), angle = 90) +
labs(y = "Genetic distance (1 - IBS)") +
guides(fill = guide_legend(override.aes = list(shape = 22)))+
coord_cartesian(xlim = c(0,137), ylim = c(0.075, 0.3), expand = FALSE) +
theme_classic()
ofavCloneDend = ofavCloneDendA + theme(
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_text(size = 12, color = "black", angle = 90),
axis.text.y = element_text(size = 10, color = "black"),
axis.line.y = element_line(),
axis.ticks.y = element_line(),
panel.grid = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
plot.background = element_blank(),
legend.key = element_blank(),
legend.title = element_text(size = 12),
legend.text = element_text(size = 10),
legend.position = "bottom")
ofavCloneDend
ggsave("../figures/extras/ofavCloneDend.png", plot = ofavCloneDend, height = 9, width = 30, units = "in", dpi = 300)
xestoCloneBams = read.csv("../data/nwgomMetaData.csv") %>% dplyr::filter(species == "X. muta") %>% dplyr::filter(!sample %in% c("XGM013", "XGM067", "XGM129"))
xestoCloneMa = as.matrix(read.table("../data/xesto/xestoClones.ibsMat")) # reads in IBS matrix produced by ANGSD
dimnames(xestoCloneMa) = list(xestoCloneBams[,1],xestoCloneBams[,1])
xestoClonesHc = hclust(as.dist(xestoCloneMa),"ave")
xestoClonePops = xestoCloneBams$bank
xestoCloneDepth = xestoCloneBams$depthZone
xestoCloneDend = xestoCloneMa %>% as.dist() %>% hclust(.,"ave") %>% as.dendrogram()
xestoCloneDData = xestoCloneDend %>% dendro_data()
# Making the branches hang shorter so we can easily see clonal groups
xestoCloneDData$segments$yend2 = xestoCloneDData$segments$yend
for(i in 1:nrow(xestoCloneDData$segments)) {
if (xestoCloneDData$segments$yend2[i] == 0) {
xestoCloneDData$segments$yend2[i] = (xestoCloneDData$segments$y[i] - 0.01)}}
xestoCloneDendPoints = xestoCloneDData$labels
xestoCloneDendPoints$pop = xestoClonePops[order.dendrogram(xestoCloneDend)]
xestoCloneDendPoints$depth=xestoCloneDepth[order.dendrogram(xestoCloneDend)]
rownames(xestoCloneDendPoints) = xestoCloneDendPoints$label
# Making points at the leaves to place symbols for populations
point = as.vector(NA)
for(i in 1:nrow(xestoCloneDData$segments)) {
if (xestoCloneDData$segments$yend[i] == 0) {
point[i] = xestoCloneDData$segments$y[i] - 0.01
} else {
point[i] = NA}}
xestoCloneDendPoints$y = point[!is.na(point)]
xestoTechReps = c("XGM034.1", "XGM034.2", "XGM034.3", "XGM072.1", "XGM072.2", "XGM072.3", "XGM158.1", "XGM158.2", "XGM158.3", "XGM179.1", "XGM179.2", "XGM179.3", "XGM193.1", "XGM193.2", "XGM193.3", "XGM203.1", "XGM203.2", "XGM203.3")
xestoCloneDendPoints$depth = factor(xestoCloneDendPoints$depth)
xestoCloneDendPoints$depth = factor(xestoCloneDendPoints$depth,levels(xestoCloneDendPoints$depth)[c(2,1)])
xestoCloneDendPoints$pop = factor(xestoCloneDendPoints$pop)
xestoCloneDendPoints$pop = factor(xestoCloneDendPoints$pop,levels(xestoCloneDendPoints$pop)[c(5, 2, 1, 4, 3)])
xestoCloneDendA = ggplot() +
geom_segment(data = segment(xestoCloneDData), aes(x = x, y = y, xend = xend, yend = yend2), size = 0.5) +
geom_point(data = xestoCloneDendPoints, aes(x = x, y = y, fill = pop, shape = depth), size = 4, stroke = 0.25) +
scale_fill_manual(values = gomPal, name= "Bank")+
scale_shape_manual(values = c(24, 25), name = "Depth Zone")+
geom_hline(yintercept = 0.174, color = pink, lty = 5, linewidth = 0.75) + # creating a dashed line to indicate a clonal distance threshold
geom_text(data = subset(xestoCloneDendPoints, subset = label %in% xestoTechReps), aes(x = x, y = (y - .015), label = label), angle = 90) + # spacing technical replicates further from leaf
geom_text(data = subset(xestoCloneDendPoints, subset = !label %in% xestoTechReps), aes(x = x, y = (y - .010), label = label), angle = 90) +
labs(y = "Genetic distance (1 - IBS)") +
guides(fill = guide_legend(override.aes = list(shape = 22)))+
coord_cartesian(xlim = c(0, 217), ylim = c(0.075, 0.3), expand = FALSE) +
theme_classic()
xestoCloneDend = xestoCloneDendA + theme(
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_text(size = 12, color = "black", angle = 90),
axis.text.y = element_text(size = 10, color = "black"),
axis.line.y = element_line(),
axis.ticks.y = element_line(),
panel.grid = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
plot.background = element_blank(),
legend.key = element_blank(),
legend.title = element_text(size = 12),
legend.text = element_text(size = 10),
legend.position = "bottom")
# xestoCloneDend
ggsave("../figures/extras/xestoCloneDend.png", plot = xestoCloneDend, height = 8, width = 35, units = "in", dpi = 300)
mcavBams = read.csv("../data/nwgomMetaData.csv") %>% dplyr::filter(species == "M. cavernosa") %>% dplyr::filter(!sample %in% c("MGM072.1", "MGM072.3", "MGM013.2", "MGM013.3", "MGM008.1", "MGM008.2", "MGM010", "MGM024.1", "MGM004", "MGM002.2", "MGM002.3", "MGM038.2", "MGM038.3"))# list of bams files and their populations (tech reps removed)
mcavBams$sample <- gsub("\\.[1-3]*$", "", mcavBams$sample)
mcavBams$bank = factor(mcavBams$bank)
mcavBams$bank = factor(mcavBams$bank, levels = levels(mcavBams$bank)[c(5, 2, 1, 3, 4)])
mcavBams$depthZone = factor(mcavBams$depthZone)
mcavBams$depthZone = factor(mcavBams$depthZone, levels = levels(mcavBams$depthZone)[c(2, 1)])
mcavMa = as.matrix(read.table("../data/mcav/mcavNoClones.ibsMat")) # reads in IBS matrix produced by ANGSD
dimnames(mcavMa) = list(mcavBams[,3],mcavBams[,3])
## admixture K = 2
#-------------------------------------
mcavK2ad = read.table("../data/mcav/admix/mcavK2.output") %>% dplyr::select(V6, V7)
mcavK2ad %>% summarise(sum(V6),sum(V7))
## sum(V6) sum(V7)
## 1 119.4213 49.5787
mcavAdmixK2 = mcavBams %>%
dplyr::select(sample, bank, "depth" = depthZone, "depthm" = depthM) %>%
cbind(mcavK2ad) %>%
dplyr::rename("Mc1" = "V7", "Mc2" = "V6") %>% dplyr::select(order(colnames(.))) %>% dplyr::relocate(sample)
mcavAdmixK2_melt = melt(mcavAdmixK2, id = c("sample", "bank", "depth", "depthm"))
## admixture K = 3
#-------------------------------------
mcavK3ad = read.table("../data/mcav/admix/mcavK3.output") %>% dplyr::select(V6, V7, V8)
mcavK3ad %>% summarise(sum(V6),sum(V7), sum(V8))
## sum(V6) sum(V7) sum(V8)
## 1 65.4637 43.7152 59.8217
mcavAdmixK3 = mcavBams %>%
dplyr::select(sample, bank, "depth" = depthZone, "depthm" = depthM) %>%
cbind(mcavK3ad) %>%
dplyr::rename("Mc1" = "V7", "Mc2" = "V6",, "Mc3" = "V8") %>%
dplyr::select(order(colnames(.)))
mcavAdmixK3_melt = melt(mcavAdmixK3, id = c("sample", "bank", "depth", "depthm"))
## ngsAdmix K = 4
#-------------------------------------
mcavK4ad = read.table("../data/mcav/admix/mcavK4.output") %>% dplyr::select(V6, V7, V8, V9)
mcavK4ad %>% summarise(sum(V6),sum(V7), sum(V8), sum(V9))
## sum(V6) sum(V7) sum(V8) sum(V9)
## 1 43.3693 39.3483 41.005 45.2775
mcavAdmixK4 = mcavBams %>%
dplyr::select(sample, bank, "depth" = depthZone, "depthm" = depthM) %>%
cbind(mcavK4ad) %>%
dplyr::rename("Mc1" = "V7", "Mc2" = "V6", "Mc3" = "V8", "Mc4" = "V9") %>%
dplyr::select(order(colnames(.)))
mcavAdmixK4_melt = melt(mcavAdmixK4, id = c("sample", "bank", "depth", "depthm"))
## admixture K = 5
#-------------------------------------
mcavK5ad = read.table("../data/mcav/admix/mcavK5.output") %>% dplyr::select(V6, V7, V8, V9, V10)
mcavK5ad %>% summarise(sum(V6),sum(V7), sum(V8), sum(V9), sum(V10))
## sum(V6) sum(V7) sum(V8) sum(V9) sum(V10)
## 1 39.392 33.285 36.313 35.6101 24.3984
mcavAdmixK5 = mcavBams %>%
dplyr::select(sample, bank, "depth" = depthZone, "depthm" = depthM) %>%
cbind(mcavK5ad) %>%
dplyr::rename("Mc1" = "V7", "Mc2" = "V6", "Mc3" = "V8", "Mc4" = "V9", "Mc5" = "V10") %>% dplyr::select(order(colnames(.)))
mcavAdmixK5_melt = melt(mcavAdmixK5, id = c("sample", "bank", "depth", "depthm"))
## admixture K = 6
#-------------------------------------
mcavK6ad = read.table("../data/mcav/admix/mcavK6.output") %>% dplyr::select(V6, V7, V8, V9, V10,V11)
mcavK6ad %>% summarise(sum(V6),sum(V7), sum(V8), sum(V9), sum(V10), sum(V11))
## sum(V6) sum(V7) sum(V8) sum(V9) sum(V10) sum(V11)
## 1 32.9952 29.8928 30.4508 29.7122 20.8536 25.0957
mcavAdmixK6 = mcavBams %>%
dplyr::select(sample, bank, "depth" = depthZone, "depthm" = depthM) %>%
cbind(mcavK6ad) %>%
dplyr::rename("Mc1" = "V7", "Mc2" = "V6", "Mc3" = "V8", "Mc4" = "V9", "Mc5" = "V10", "Mc6" = "V11") %>% dplyr::select(order(colnames(.)))
mcavAdmixK6_melt = melt(mcavAdmixK6, id = c("sample", "bank", "depth", "depthm"))
## construct figure
#-------------------------------------
# ggtree(tr) + geom_nodelab(aes(label = node), hjust = -0.5)
{
mcavTr = hclust(dist(mcavMa),"ave")
mcavP1 = ggtree(mcavTr, layout = "rectangular", size = 0.35)
mcavP2 = facet_plot(mcavP1, panel = "location", data=mcavAdmixK2, geom = geom_tile, mapping = aes(x = 1, fill = bank), color = "grey25", size = 0.1) +
scale_fill_manual("Site", values = gomPal, guide = guide_legend(order = 1)) +
new_scale_fill()
mcavP3 = facet_plot(mcavP2, panel = "depth zone", data=mcavAdmixK2, geom = geom_tile, mapping = aes(x = 1, fill = depth), color = "grey25", size = 0.1) +
scale_fill_manual("Depth zone", values = c("#A7E1BCFF", "#414083FF"), guide = guide_legend(order = 2)) +
new_scale_fill()
mcavP4 = facet_plot(mcavP3, panel = "depth", data=mcavAdmixK2, geom = geom_tile, mapping = aes(x = 1, fill = depthm), color = "grey25", size = 0.1) +
scale_fill_viridis_c("Depth (m)", option = "mako", trans = "reverse", limits = c(60, 10)) +
new_scale_fill()
mcavP5 = facet_plot(mcavP4, panel="K = 2", data=mcavAdmixK2_melt, geom=geom_barh, width = 1.0, mapping = aes(x = value, fill = variable),
stat='identity', color = "grey25", size = 0.1, show.legend = FALSE) +
scale_fill_manual("Lineage",values = mcavKColPal[1:6])
mcavP6 = facet_plot( mcavP5, panel="K = 3", data=mcavAdmixK3_melt, geom=geom_barh, width = 1.0, mapping = aes(x = value, fill = variable),
stat='identity', color = "grey25", size = 0.1, show.legend = FALSE)
mcavP7 = facet_plot(mcavP6, panel="K = 4", data=mcavAdmixK4_melt, geom=geom_barh, width = 1.0, mapping = aes(x = value, fill = variable),
stat='identity', color = "grey25", size = 0.1, show.legend = FALSE)
mcavP8 = facet_plot(mcavP7, panel="K = 5", data=mcavAdmixK5_melt, geom=geom_barh, width = 1.0, mapping = aes(x = value, fill = variable),
stat='identity', color = "grey25", size = 0.1, show.legend = FALSE)
mcavP9 = facet_plot(mcavP8, panel="K = 6", data=mcavAdmixK6_melt, geom=geom_barh, width = 1.0, mapping = aes(x = value, fill = variable),
stat='identity', color = "grey25", size = 0.1, show.legend = FALSE) +
theme_tree(strip.text = element_blank(),
panel.spacing = unit(.1, "line"))
}
mcavImg = image_read("../figures/icons/mcav.png") %>% image_ggplot()
mcavStructure = facet_widths(mcavP9, widths = c(0.5, 0.025, 0.025, 0.025, 0.2, 0.1, 0.1, 0.1, 0.1)) #+ inset_element(mcavImg, 0.0, 0.9, 0.1, 1.0, align_to = "full", ignore_tag = TRUE)
mcavStructure
mcavData = read.csv("../data/nwgomMetaData.csv") %>% dplyr::filter(species == "M. cavernosa") %>% dplyr::filter(!sample %in% c("MGM072.1", "MGM072.3", "MGM013.2", "MGM013.3", "MGM008.1", "MGM008.2", "MGM010", "MGM024.1", "MGM004", "MGM002.2", "MGM002.3", "MGM038.2", "MGM038.3"))
mcavData$depthZone = factor(mcavData$depthZone)
mcavData$depthZone = factor(mcavData$depthZone, levels(mcavData$depthZone)[c(2,1)])
mcavData$bank = factor(mcavData$bank)
mcavData$bank = factor(mcavData$bank,levels(mcavData$bank)[c(5, 2, 1, 4, 3)])
mcavPcadmix = read.table("../data/mcav/admix/mcavK2.output") %>%dplyr::select(V6, V7)
mcavPcadmix %>% summarise(sum(V6),sum(V7))
## sum(V6) sum(V7)
## 1 119.4213 49.5787
mcavPcadmix = mcavData %>% cbind(mcavPcadmix) %>% rename("cluster1" = "V7", "cluster2" = "V6") %>%dplyr::select(order(colnames(.)))
mcavPcadmixClust = mcavPcadmix %>% mutate(cluster = ifelse(cluster1 < 0.75 & cluster2 < 0.75, "NA", ifelse(cluster1 >=0.75, 1, ifelse(cluster2 >= 0.75, 2, 0))))
mcavPcadmix = mcavPcadmix %>% mutate(mcavPcadmixClust)
mcavPcadmix$cluster = as.factor(mcavPcadmix$cluster)
levels(mcavPcadmix$cluster) = c("Mc_1", "Mc_2", "Admixed")
mcavSiteLineages = mcavPcadmix %>% dplyr::select(depthZone, cluster) %>%
group_by(depthZone) %>% count(cluster) %>% mutate(Freq = n/sum(n)) %>% apply(2, function(x) gsub("\\s+", "", x)) %>% as.data.frame()
mcavCov = read.table("../data/mcav/mcavPcangsd.cov") %>% as.matrix()
mcavPcAdmix = read.table("../data/mcav/admix/mcavK2.output") %>%dplyr::select(V6, V7)
mcavPcAdmix %>% summarise(sum(V6),sum(V7))
## sum(V6) sum(V7)
## 1 119.4213 49.5787
mcavPcAdmix = mcavPcAdmix %>% rename("cluster1" = "V7", "cluster2" = "V6") %>% dplyr::select(order(colnames(.)))
mcavPcaEig = eigen(mcavCov)
mcavPcaVar = mcavPcaEig$values/sum(mcavPcaEig$values)*100
head(mcavPcaVar)
## [1] 2.9308183 0.8264292 0.8100717 0.8070106 0.7996972 0.7900787
mcavPcangsd = read.csv("../data/nwgomMetaData.csv") %>% dplyr::filter(species == "M. cavernosa") %>% dplyr::filter(!sample %in% c("MGM072.1", "MGM072.3", "MGM013.2", "MGM013.3", "MGM008.1", "MGM008.2", "MGM010", "MGM024.1", "MGM004", "MGM002.2", "MGM002.3", "MGM038.2", "MGM038.3")) %>% dplyr::select("sample" = sample, "bank" = bank, "depth" = depthZone, "depthm" = depthM)
mcavPcangsd$sitedepth = as.factor(paste(mcavPcangsd$bank, mcavPcangsd$depth, sep = " "))
mcavPcangsd$sitedepth = factor(mcavPcangsd$sitedepth, levels(mcavPcangsd$sitedepth)[c(7, 6, 3, 2, 1, 4, 5)])
mcavPcangsd$bank = factor(mcavPcangsd$bank)
mcavPcangsd$bank = factor(mcavPcangsd$bank, levels(mcavPcangsd$bank)[c(5, 2, 1, 3, 4)])
mcavPcangsd$depth = factor(mcavPcangsd$depth)
mcavPcangsd$depth = factor(mcavPcangsd$depth, levels(mcavPcangsd$depth)[c(2, 1)])
mcavPcangsd$PC1 = mcavPcaEig$vectors[,1]
mcavPcangsd$PC2 = mcavPcaEig$vectors[,2]
mcavPcangsd$PC3 = mcavPcaEig$vectors[,3]
mcavPcangsd$PC4 = mcavPcaEig$vectors[,4]
mcavPcangsdClust = mcavPcAdmix %>% mutate(cluster = ifelse(cluster1 < 0.75 & cluster2 < 0.75, "NA", ifelse(cluster1 >=0.75, 1, ifelse(cluster2 >= 0.75, 2, 0))))
# pcangsdClust$clusterX = as.vector(d_clust$classification)
mcavPcangsd = mcavPcangsd %>% mutate(mcavPcangsdClust)
mcavPcangsd$cluster = as.factor(mcavPcangsd$cluster)
levels(mcavPcangsd$cluster) = c("Mc_Deep", "Mc_1", "Admixed")
mcavBamsClusters = mcavPcangsd %>% dplyr::select(sample, cluster) %>% dplyr::arrange(sample)
mcavBamssamples = read.delim("../data/mcav/bamsNoClones", header = FALSE)
mcavBamsClusters$sample = mcavBamssamples$V1
# bamsClusters = as.data.frame(bamsClusters)
write.table(x = mcavBamsClusters, file = "../data/mcav/mcavBamsClusters", sep = "\t", row.names = FALSE, col.names = FALSE, quote = FALSE)
mcavPcangsd = merge(mcavPcangsd, aggregate(cbind(mean.x = PC1, mean.y = PC2)~sitedepth, mcavPcangsd, mean), by="sitedepth")
mcavPcangsd %>% dplyr::group_by(depth,cluster) %>% dplyr::summarize(n = n())
## `summarise()` has grouped output by 'depth'. You can override using the `.groups`
## argument.
## # A tibble: 5 × 3
## # Groups: depth [2]
## depth cluster n
## <fct> <fct> <int>
## 1 Shallow Mc_1 45
## 2 Shallow Admixed 8
## 3 Mesophotic Mc_Deep 14
## 4 Mesophotic Mc_1 57
## 5 Mesophotic Admixed 45
mcavPcaPlot12SA = ggplot() +
geom_hline(yintercept = 0, color = "gray90", size = 0.25) +
geom_vline(xintercept = 0, color = "gray90", size = 0.25) +
geom_point(data = mcavPcangsd, aes(x = PC1, y = PC2, fill = bank, shape = depth, color = bank), stroke = 0, size = 3, alpha = 0.5, show.legend = FALSE) +
geom_point(data = mcavPcangsd, aes(x = mean.x, y = mean.y, fill = bank, shape = depth), color = "black", size = 3.75, alpha = 1, stroke = 0.25) +
scale_shape_manual(values = c(21, 23), name = "Depth Zone") +
scale_fill_manual(values = gomPal, name = "Site") +
scale_color_manual(values = gomPal, name = "Site") +
labs(x = paste0("PC 1 (", format(round(mcavPcaVar[1], 1), nsmall = 1)," %)")) +
annotate(geom = "text", x = -0.1275, y = 0.027, label = paste0("PC 2 (", format(round(mcavPcaVar[2], 1), nsmall = 1), " %)"), size = 3.65, angle = 90) +
guides(shape = guide_legend(override.aes = list(size = 2, stroke = 0.25, alpha = NA), order = 2, ncol = 1), fill = guide_legend(override.aes = list(shape = 22, size = 2, fill = gomPal[c(1,2,3,4,5)], alpha = NA), order = 1, ncol = 1)) +
coord_cartesian(xlim = c(-0.1, 0.225), clip = "off") +
theme_bw()
mcavPcaPlot12S = mcavPcaPlot12SA +
pcaTheme +
theme(legend.position = c(0.87, 0.76),
axis.title.y = element_blank())
mcavPcaPlot12S
mcavPcaPlot12LA = ggplot() +
geom_hline(yintercept = 0, color = "gray90", size = 0.5) +
geom_vline(xintercept = 0, color = "gray90", size = 0.5) +
geom_point(data = mcavPcangsd, aes(x = PC1, y = PC2, fill = cluster, shape = depth), color = "black", size = 3, alpha = 1, show.legend = TRUE) +
scale_shape_manual(values = c(21, 23), name = "Depth Zone") +
scale_fill_manual(values = mcavKColPal[c(1, 2, 7)], name = "Lineage") +
labs(x = paste0("PC 1 (", format(round(mcavPcaVar[1], 1), nsmall = 1)," %)"), y = paste0("PC 2 (", format(round(mcavPcaVar[2], 1), nsmall = 1), " %)")) +
guides(shape = "none", fill = guide_legend(override.aes = list(shape = 22, size = 2, fill = mcavKColPal[c(1, 2, 7)], alpha = NA), order = 1, ncol = 1))+
coord_cartesian(xlim = c(-0.1, 0.225), clip = "off") +
theme_bw()
mcavPcaPlot12L = mcavPcaPlot12LA +
pcaTheme +
theme(legend.position = c(0.9,0.86))
mcavPcaPlot12L
mcavPcaPlots = mcavPcaPlot12S + mcavPcaPlot12L +
plot_annotation(tag_levels = 'A') &
theme(plot.tag = element_text(size = 18),
panel.background = element_rect(fill = "white"),
legend.spacing = unit(-5, "pt"),
legend.key = element_blank(),
legend.background = element_blank())
mcavPcaPlots
Prepare admixture outputs for plotting
mcavAdmix = mcavPcangsd %>%dplyr::select(-PC1, -PC2, -PC3, -PC4, -cluster, -depthm, -mean.x, -mean.y)
mcavAdmix$bank = factor(mcavAdmix$bank)
mcavAdmix = arrange(mcavAdmix, bank, depth, -cluster1, -cluster2)
mcavPopCounts = mcavAdmix %>% group_by(bank, depth) %>% summarise(n = n())
## `summarise()` has grouped output by 'bank'. You can override using the `.groups`
## argument.
mcavPopCounts
## # A tibble: 7 × 3
## # Groups: bank [5]
## bank depth n
## <fct> <fct> <int>
## 1 WFGB Shallow 27
## 2 WFGB Mesophotic 27
## 3 EFGB Shallow 26
## 4 EFGB Mesophotic 21
## 5 Bright Mesophotic 28
## 6 Geyer Mesophotic 12
## 7 McGrail Mesophotic 28
mcavPopCountList = reshape2::melt(lapply(mcavPopCounts$n,function(x){c(1:x)}))
mcavAdmix$ord = mcavPopCountList$value
mcavAdmixMelt = melt(mcavAdmix, id.vars=c("sample", "bank", "depth", "sitedepth", "ord"), variable.name="Ancestry", value.name="Fraction")
mcavAdmixMelt$Ancestry = factor(mcavAdmixMelt$Ancestry)
mcavAdmixMelt$Ancestry = factor(mcavAdmixMelt$Ancestry, levels = rev(levels(mcavAdmixMelt$Ancestry)))
mcavPopAnno = data.frame(x1 = c(0.5, 0.5, 0.5, 0.5, 0.5), x2 = c(28.5, 28.5, 28.5, 28.5, 28.5),
y1 = -0.1, y2 = -0.1, sample = NA, Ancestry = NA, depth = "Mesophotic",
ord = NA, Fraction = NA,
bank = c("WFGB", "EFGB", "Bright", "Geyer", "McGrail"))
mcavPopAnno$bank = factor(mcavPopAnno$bank)
mcavPopAnno$bank = factor(mcavPopAnno$bank, levels = levels(mcavPopAnno$bank)[c(5, 2, 1, 3, 4)])
mcavAdmixPlotA = ggplot(data = mcavAdmixMelt, aes(x = ord, y = Fraction, fill = Ancestry, order = sample)) +
geom_segment(data = mcavPopAnno, aes(x = x1, xend = x2, y = -.1, yend = -.1, color = bank), size = 7) +
geom_bar(stat = "identity", position = "fill", width = 1, colour = "grey25", size = 0.2) +
facet_grid(factor(depth) ~ bank, switch = "both", scales = "free_x") +
geom_text(data = (mcavAdmixMelt %>% filter(depth == "Mesophotic", bank %in% c("WFGB", "EFGB", "Bright", "Geyer", "McGrail"), sample %in% c(
"MGM116", "MGM015", "MGM001", "MGM144", "MGM062"), Ancestry == "cluster1")), x = 15.5, y = -.1, aes(label = bank), size = 4, color = "black") +
scale_fill_manual(values = mcavKColPal[1:2]) +
scale_color_manual(values = gomPal) +
scale_x_discrete(expand = c(0.005, 0.005)) +
scale_y_continuous(expand = c(0.001, 0.001)) +
coord_cartesian(ylim = c(0.0, 1.0), clip = "off") +
theme_bw()
mcavAdmixPlot = mcavAdmixPlotA +
theme_bw()+
admixTheme
mcavAdmixPlot
ticks = data.frame(y = c(seq(5,75,5)), x = 0.15, xend = 0.08)
leveneTest(lm(depthm ~ cluster, data = mcavPcangsd))
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 20.177 0.00000001432 ***
## 166
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mcavDepthAov = welch_anova_test(depthm ~ cluster, data = subset(mcavPcangsd, subset = mcavPcangsd$cluster!="Admixed"))
mcavDF = mcavDepthAov$statistic[[1]]
mcavDepthPH = games_howell_test(depthm ~ cluster, data = mcavPcangsd, conf.level = 0.95) %>% as.data.frame()
mcavDepthComps = paste(mcavDepthPH$group1,"-",mcavDepthPH$group2, sep ="")
# mcavComps = c("D1-D2", "D1-D3", "D1-D4", "D1-S1", "D1-S2", "D1-A",)
mcavDepthPs = mcavDepthPH$p.adj
mcDepthMultComp = mcavDepthPs < .05
names(mcDepthMultComp) = mcavDepthComps
mcDepthLet = multcompLetters(mcDepthMultComp)
mcDepthLetters = mcDepthLet$Letters %>% as.data.frame()
mcavDepthLetters = data.frame(x = factor(rownames(mcDepthLetters)), y = 5, grp = mcDepthLetters$.)
mcavLineageViolinA = ggplot(data = mcavPcangsd, aes(x = cluster, y = depthm)) +
annotate(geom = "rect", xmin = -Inf, xmax = Inf, ymin = 30, ymax = Inf, fill = "black", alpha = 0.10, color = NA) +
geom_violin(aes(fill = cluster),adjust = 1, linewidth = 0, color = "black", alpha = 0.75, width = 0.9, trim = F, scale = "area") +
geom_beeswarm(aes(fill = cluster), shape = 21, size = 2, cex = 1.5, alpha = 0.5) +
geom_violin(adjust = 1, linewidth = 0.4, color = "black", alpha = 1, width = 0.9, trim = F, fill = NA, scale = "area") +
geom_boxplot(width = 0.1, color = "black", fill = "white", outlier.colour = NA, linewidth = 0.6, alpha = 0.5) +
annotate(geom = "text", x = 2.8, y = 75, label = bquote(italic("F")~" = "~.(mcavDF)*"; "~italic("p")~" < 0.001"), size = 3) +
geom_segment(data = ticks, aes(x = x, xend = xend, y = y), color = "black") +
annotate(geom = "text", x = mcavDepthLetters$x, y = mcavDepthLetters$y, label = mcavDepthLetters$grp) +
scale_fill_discrete(type = mcavKColPal[c(1, 2, 7)], name = "Lineage") +
scale_color_discrete(type = mcavKColPal[c(1, 2, 7)], name = "Lineage") +
xlab("Lineage") +
ylab("Depth (m)") +
scale_y_reverse(breaks = seq(5, 90, 5)) +
coord_cartesian(xlim = c(.75, 3.25), clip = "off") +
theme_bw()+
theme(axis.ticks.y = element_blank(),
panel.background = element_rect(linewidth = 0))
mcavLineageViolin = mcavLineageViolinA + violinTheme
mcavLineageViolin
mcavData = read.csv("../data/nwgomMetaData.csv") %>% dplyr::filter(species == "M. cavernosa") %>% dplyr::filter(!sample %in% c("MGM072.1", "MGM072.3", "MGM013.2", "MGM013.3", "MGM008.1", "MGM008.2", "MGM010", "MGM024.1", "MGM004", "MGM002.2", "MGM002.3", "MGM038.2", "MGM038.3"))
mcavHetData = read.delim("../data/mcav/mcavHet.out", header = FALSE, sep = " ") %>% mutate(sample = mcavData$sample) %>% rename("Het" = "V2") %>% left_join(mcavPcangsd) %>% select(sample, Het, cluster)
## Joining with `by = join_by(sample)`
mcavHet = mcavData %>% left_join(mcavHetData)
## Joining with `by = join_by(sample)`
mcavHetAOV = welch_anova_test(mcavHet, Het ~ cluster)
mcavHetAOV$p
## [1] 0.0000544
mcavHetPH = games_howell_test(mcavHet, Het ~ cluster)
mcavHetPH$p.adj
## [1] 0.98000000 0.01500000 0.00000699
mcavComps = paste(mcavHetPH$group1,"-",mcavHetPH$group2, sep ="")
mcavPs = mcavHetPH$p.adj
mcMultComp = mcavPs < .05
names(mcMultComp) = mcavComps
mcLet = multcompLetters(mcMultComp)
mcLetters = mcLet$Letters %>% as.data.frame()
mcavHetLetters = data.frame(x = factor(rownames(mcLetters)), y = 0.0025, grp = mcLetters$.)
mcavFST = read.delim("../data/mcav/mcavKFst.out")
mcavHetPlot = ggplot(data = mcavHet, aes(x = cluster, y = Het)) +
geom_violin(aes(fill = cluster, group = cluster), adjust = 1, linewidth = 0.4, color = "black", alpha = 1, trim = FALSE, scale = "area", width = 1) +
geom_beeswarm(aes(fill = cluster, group = cluster),shape = 21, cex = 0.5) +
geom_boxplot(aes(fill = cluster, group = cluster), width = 0.2, color = "black", fill = "white", outlier.colour = NA, linewidth = 0.4, alpha = 0.5) + xlab("Lineage") +
annotate(geom = "text", x = 2.85, y = 0, label = bquote(italic("F")~" = "~.(mcavHetAOV$statistic)*"; "~italic("p")~" < 0.001"), size = 3) +
geom_text(data = mcavHetLetters, aes(x = x, y = y, label = grp), size = 4) +
geom_segment(data = mcavFST, aes(y = 0.0022, x = pop1, xend = pop2)) +
annotate(geom = "text", x = 1.5, y = 0.0023, label = bquote(italic("F")["ST"]~" = "~.(round(mcavFST$weightedFst, 3)))) +
scale_fill_discrete(type = mcavKColPal[c(1,2,7)], name = "Lineage") +
xlab("Lineage") +
ylab("Heterozygosity") +
scale_y_continuous(breaks = seq(0, 0.0025, 0.0005)) +
coord_cartesian(expand = TRUE, xlim = c(0.85, 3.15), ylim = c(0, 0.0025)) +
theme_bw() +
violinTheme
mcavHetPlot
mcavNpgList = list(read_tsv("../data/mcav/Mc_Deep.thetas.idx.pestPG") %>% mutate(lineage = "Mc_Deep"),
read_tsv("../data/mcav/Mc_1.thetas.idx.pestPG") %>% mutate(lineage = "Mc_1"))
## Rows: 2654 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): #(indexStart,indexStop)(firstPos_withData,lastPos_withData)(WinStart,WinSt...
## dbl (12): WinCenter, tW, tP, tF, tH, tL, Tajima, fuf, fud, fayh, zeng, nSites
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 2654 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): #(indexStart,indexStop)(firstPos_withData,lastPos_withData)(WinStart,WinSt...
## dbl (12): WinCenter, tW, tP, tF, tH, tL, Tajima, fuf, fud, fayh, zeng, nSites
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
mcavPiAll = purrr::reduce(mcavNpgList, rbind) %>%
dplyr::group_by(lineage) %>%
filter(tP != 0) %>%
mutate(tPps = tP/nSites) %>%
dplyr::summarize(tPps = mean(tPps))
mcavPiAll$Ne = (mcavPiAll$tPps)/(4*2e-8)
mcavPiAll
## # A tibble: 2 × 3
## lineage tPps Ne
## <chr> <dbl> <dbl>
## 1 Mc_1 0.00355 44422.
## 2 Mc_Deep 0.00355 44348.
mcavPiAll$lineage = factor(mcavPiAll$lineage)
mcavPiAll$lineage = factor(mcavPiAll$lineage, levels = levels(mcavPiAll$lineage)[c(2,1)])
mcavNuclDivPlot = ggplot(mcavPiAll, aes(x = lineage, y = tPps, fill = lineage, group = lineage)) +
geom_bar(position = position_dodge2(preserve = "single"), stat = "identity", color = "black") +
scale_fill_manual(values = mcavKColPal) +
geom_text(position = position_dodge2(preserve = "single", width = 0.9), aes(y = tPps-.0015, label= scales::comma(round(Ne,0)), hjust = 0, fontface = "bold"), angle = 90, color = "black", ) +
labs(x = "Lineage", y = bquote("Nucleotide diversity ("*pi*")")) +
theme_bw() +
violinTheme +
theme(axis.text.x = ggtext::element_markdown())
mcavNuclDivPlot
mcavData = read.csv("../data/nwgomMetaData.csv") %>% dplyr::filter(species == "M. cavernosa") %>% dplyr::filter(!sample %in% c("MGM072.1", "MGM072.3", "MGM013.2", "MGM013.3", "MGM008.1", "MGM008.2", "MGM010", "MGM024.1", "MGM004", "MGM002.2", "MGM002.3", "MGM038.2", "MGM038.3")) %>% dplyr::select("sample" = sample, "site" = bank, "depth" = depthZone)
mcavZoox = read.delim("../data/mcav/mcavZooxReads", header = FALSE, check.names = FALSE)
head(mcavZoox)
## V1 V2
## 1 MGM001.trim.zoox.bt2.bam NA
## 2 chr1 15
## 3 chr2 32
## 4 chr3 12
## 5 chr4 29
## 6 chr5 113
# Reconstruct read mapping output into dataframe usable for analysis
mcavZoox$V2[is.na(mcavZoox$V2)] <- as.character(mcavZoox$V1[is.na(mcavZoox$V2)])
mcavZoox$V1 = gsub(pattern = "MGM*.", "chr", mcavZoox$V1)
mcavZoox$V2 = gsub(".trim.*", "", mcavZoox$V2)
mcavZoox = mcavZoox %>% filter(mcavZoox$V1 != "*")
mcavZooxLst = split(mcavZoox$V2, as.integer(gl(length(mcavZoox$V2), 20, length(mcavZoox$V2))))
mcavZooxMaps = NULL
for(i in mcavZooxLst){
mcavZooxMaps = rbind(mcavZooxMaps, data.frame(t(i)))
}
# remove tech reps
mcavZooxMaps = mcavZooxMaps %>% dplyr::filter(!X1 %in% c("MGM072.1", "MGM072.3", "MGM013.2", "MGM013.3", "MGM008.1", "MGM008.2", "MGM010", "MGM024.1", "MGM004", "MGM002.2", "MGM002.3", "MGM038.2", "MGM038.3"))
colnames(mcavZooxMaps) = c("sample",mcavZoox$V1[c(2:20)])
# convert characters to numeric
str(mcavZooxMaps)
## 'data.frame': 169 obs. of 20 variables:
## $ sample: chr "MGM001" "MGM002.1" "MGM003" "MGM005" ...
## $ chr1 : chr "15" "88" "38" "108" ...
## $ chr2 : chr "32" "98" "69" "125" ...
## $ chr3 : chr "12" "83" "46" "79" ...
## $ chr4 : chr "29" "94" "36" "101" ...
## $ chr5 : chr "113" "133" "44" "148" ...
## $ chr6 : chr "51" "108" "44" "99" ...
## $ chr7 : chr "24" "31" "41" "72" ...
## $ chr8 : chr "77" "87" "87" "141" ...
## $ chr9 : chr "55" "74" "66" "141" ...
## $ chr10 : chr "718" "2764" "1330" "6699" ...
## $ chr11 : chr "881" "3522" "1501" "8839" ...
## $ chr12 : chr "1065" "4409" "1835" "9631" ...
## $ chr13 : chr "951" "3580" "1579" "8742" ...
## $ chr14 : chr "1038" "3669" "1738" "8645" ...
## $ chr15 : chr "238" "642" "382" "1639" ...
## $ chr16 : chr "26" "60" "96" "238" ...
## $ chr17 : chr "29" "69" "40" "81" ...
## $ chr18 : chr "13" "28" "27" "90" ...
## $ chr19 : chr "5" "4" "6" "21" ...
for(i in c(2:20)){
mcavZooxMaps[,i] = as.numeric(mcavZooxMaps[,i])
}
str(mcavZooxMaps)
## 'data.frame': 169 obs. of 20 variables:
## $ sample: chr "MGM001" "MGM002.1" "MGM003" "MGM005" ...
## $ chr1 : num 15 88 38 108 56 88 34 22 56 11 ...
## $ chr2 : num 32 98 69 125 69 41 36 34 65 12 ...
## $ chr3 : num 12 83 46 79 44 33 28 21 72 15 ...
## $ chr4 : num 29 94 36 101 66 45 30 26 65 13 ...
## $ chr5 : num 113 133 44 148 122 116 128 3 9 3 ...
## $ chr6 : num 51 108 44 99 72 72 25 9 8 5 ...
## $ chr7 : num 24 31 41 72 36 41 29 5 1 3 ...
## $ chr8 : num 77 87 87 141 82 58 52 9 13 6 ...
## $ chr9 : num 55 74 66 141 100 67 48 15 14 17 ...
## $ chr10 : num 718 2764 1330 6699 4004 ...
## $ chr11 : num 881 3522 1501 8839 3526 ...
## $ chr12 : num 1065 4409 1835 9631 4135 ...
## $ chr13 : num 951 3580 1579 8742 3537 ...
## $ chr14 : num 1038 3669 1738 8645 3452 ...
## $ chr15 : num 238 642 382 1639 726 ...
## $ chr16 : num 26 60 96 238 331 68 10 6 13 15 ...
## $ chr17 : num 29 69 40 81 144 64 9 2 9 8 ...
## $ chr18 : num 13 28 27 90 231 31 5 7 11 12 ...
## $ chr19 : num 5 4 6 21 29 2 1 2 1 2 ...
# collapse fake chromosomes into representative genera
mcavZooxMaps$Symbiodinium = rowSums(mcavZooxMaps[2:6])
mcavZooxMaps$Breviolum = rowSums(mcavZooxMaps[7:10])
mcavZooxMaps$Cladocopium = rowSums(mcavZooxMaps[11:16])
mcavZooxMaps$Durusdinium = rowSums(mcavZooxMaps[17:20])
# keep genera totals and turn into proportions for barplot
mcavZooxMaps = mcavZooxMaps[,c(1, 21:24)]
mcavZooxProp = mcavZooxMaps
mcavZooxProp$sum = apply(mcavZooxProp[, c(2:length(mcavZooxProp[1,]))], 1, function(x) {
sum(x, na.rm = T)
})
mcavZooxProp = cbind(mcavZooxProp$sample, (mcavZooxProp[, c(2:(ncol(mcavZooxProp)-1))]
/ mcavZooxProp$sum))
colnames(mcavZooxProp)[1] = "sample"
head(mcavZooxProp)
## sample Symbiodinium Breviolum Cladocopium Durusdinium
## 1 MGM001 0.037416232 0.038533135 0.9104617 0.013588980
## 2 MGM002.1 0.025379931 0.015350765 0.9510311 0.008238244
## 3 MGM003 0.025874514 0.026429761 0.9289284 0.018767351
## 4 MGM005 0.012292119 0.009925721 0.9683604 0.009421766
## 5 MGM006 0.017194875 0.013967826 0.9334361 0.035401214
## 6 MGM007 0.006753366 0.004976165 0.9848206 0.003449862
# Check that all samples total to 1
apply(mcavZooxProp[, c(2:(ncol(mcavZooxProp)))], 1, function(x) {
sum(x, na.rm = T)
})
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [42] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [83] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [124] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [165] 1 1 1 1 1
# add sample metadata to proportions
mcavSym = mcavData %>% left_join(mcavZooxProp)
## Joining with `by = join_by(sample)`
mcavAdmixOrd = mcavAdmix %>% dplyr::select(sample, ord)
mcavPcangsdITS = mcavPcangsd %>% dplyr::select(sample, cluster)
mcavSym = mcavSym %>% left_join(mcavAdmixOrd) %>% relocate(ord,.after = depth) %>% left_join(mcavPcangsdITS) %>% relocate(cluster, .after = depth)
## Joining with `by = join_by(sample)`
## Joining with `by = join_by(sample)`
mcavZooxPermData = mcavData %>% left_join(mcavZooxMaps) %>% left_join(mcavPcangsdITS) %>% relocate(cluster, .after = depth)
## Joining with `by = join_by(sample)`
## Joining with `by = join_by(sample)`
mcavZooxPermData$depth = factor(mcavZooxPermData$depth)
mcavZooxPermData$depth = factor(mcavZooxPermData$depth, levels = levels(mcavZooxPermData$depth)[c(2, 1)])
mcavZooxPermData$site = factor(mcavZooxPermData$site)
mcavZooxPermData$site = factor(mcavZooxPermData$site, levels = levels(mcavZooxPermData$site)[c(5, 2, 1, 3, 4)])
head(mcavZooxPermData)
## sample site depth cluster Symbiodinium Breviolum Cladocopium Durusdinium
## 1 MGM001 Geyer Mesophotic Admixed 201 207 4891 73
## 2 MGM002.1 Geyer Mesophotic Mc_1 496 300 18586 161
## 3 MGM003 Geyer Mesophotic Mc_1 233 238 8365 169
## 4 MGM005 Geyer Mesophotic Mc_1 561 453 44195 430
## 5 MGM006 Geyer Mesophotic Admixed 357 290 19380 735
## 6 MGM007 Geyer Mesophotic Mc_1 323 238 47102 165
set.seed(694)
mcavSymPerm = adonis2(decostand(mcavZooxPermData[ c(5:((ncol(mcavZooxPermData))))], "normalize") ~ depth * site, data = mcavZooxPermData, permutations = 9999, method = "bray", sqrt.dist = TRUE, by = "terms")
mcavSymPerm
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = decostand(mcavZooxPermData[c(5:((ncol(mcavZooxPermData))))], "normalize") ~ depth * site, data = mcavZooxPermData, permutations = 9999, method = "bray", sqrt.dist = TRUE, by = "terms")
## Df SumOfSqs R2 F Pr(>F)
## depth 1 0.07877 0.02700 4.9029 0.0022 **
## site 4 0.21184 0.07261 3.2962 0.0003 ***
## depth:site 1 0.02395 0.00821 1.4906 0.1619
## Residual 162 2.60281 0.89218
## Total 168 2.91737 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mcavSymPermPW = pairwise.adonis2(decostand(mcavZooxPermData[ c(5:((ncol(mcavZooxPermData))))], "normalize") ~ site, data = mcavZooxPermData, nperm = 9999, method = "bray", sqrt.dist = TRUE, by = "terms")
mcavSymPermPW
## $parent_call
## [1] "decostand(mcavZooxPermData[c(5:((ncol(mcavZooxPermData))))], \"normalize\") ~ site , strata = Null , permutations 9999"
##
## $Geyer_vs_EFGB
## Df SumOfSqs R2 F Pr(>F)
## site 1 0.08318 0.06036 3.6616 0.018 *
## Residual 57 1.29483 0.93964
## Total 58 1.37801 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $Geyer_vs_WFGB
## Df SumOfSqs R2 F Pr(>F)
## site 1 0.08459 0.05484 3.7136 0.008 **
## Residual 64 1.45780 0.94516
## Total 65 1.54239 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $Geyer_vs_Bright
## Df SumOfSqs R2 F Pr(>F)
## site 1 0.11441 0.13657 6.0106 0.001 ***
## Residual 38 0.72332 0.86343
## Total 39 0.83773 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $Geyer_vs_McGrail
## Df SumOfSqs R2 F Pr(>F)
## site 1 0.12069 0.14891 6.6486 0.001 ***
## Residual 38 0.68978 0.85109
## Total 39 0.81047 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $EFGB_vs_WFGB
## Df SumOfSqs R2 F Pr(>F)
## site 1 0.01194 0.00657 0.6542 0.625
## Residual 99 1.80681 0.99343
## Total 100 1.81875 1.00000
##
## $EFGB_vs_Bright
## Df SumOfSqs R2 F Pr(>F)
## site 1 0.02521 0.02297 1.7163 0.112
## Residual 73 1.07232 0.97703
## Total 74 1.09753 1.00000
##
## $EFGB_vs_McGrail
## Df SumOfSqs R2 F Pr(>F)
## site 1 0.02782 0.02609 1.9553 0.073 .
## Residual 73 1.03879 0.97391
## Total 74 1.06661 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $WFGB_vs_Bright
## Df SumOfSqs R2 F Pr(>F)
## site 1 0.01821 0.01453 1.1795 0.266
## Residual 80 1.23529 0.98547
## Total 81 1.25350 1.00000
##
## $WFGB_vs_McGrail
## Df SumOfSqs R2 F Pr(>F)
## site 1 0.02256 0.01843 1.5019 0.171
## Residual 80 1.20176 0.98157
## Total 81 1.22432 1.00000
##
## $Bright_vs_McGrail
## Df SumOfSqs R2 F Pr(>F)
## site 1 0.00667 0.01407 0.7707 0.566
## Residual 54 0.46727 0.98593
## Total 55 0.47394 1.00000
##
## attr(,"class")
## [1] "pwadstrata" "list"
M. cavernosa lineage vs Symbiodiniaceae B-C distance
mcavZooxDist = mcavZooxPermData[ c(5:ncol(mcavZooxPermData))] %>% decostand("normalize") %>% vegdist(method = "bray")
mcavZooxPCoA = cmdscale(mcavZooxDist, eig = TRUE, x.ret = TRUE)
mcavAdmixDist = mcavAdmixK2[c(5:ncol(mcavAdmixK2))] %>% vegdist(method = "euclidean")
mcavAdmixPCoA = cmdscale(mcavAdmixDist, eig = TRUE, x.ret = TRUE)
set.seed(981)
mcavProcrustes = protest(X = mcavAdmixPCoA, Y = mcavZooxPCoA , permutations = 9999)
mcavProcrustes
##
## Call:
## protest(X = mcavAdmixPCoA, Y = mcavZooxPCoA, permutations = 9999)
##
## Procrustes Sum of Squares (m12 squared): 0.9984
## Correlation in a symmetric Procrustes rotation: 0.04
## Significance: 0.6897
##
## Permutation: free
## Number of permutations: 9999
mcavSym$depth = factor(mcavSym$depth)
mcavSym$depth = factor(mcavSym$depth, levels = levels(mcavSym$depth)[c(2, 1)])
mcavSym$site = factor(mcavSym$site)
mcavSym$site = factor(mcavSym$site, levels = levels(mcavSym$site)[c(5, 2, 1, 3, 4)])
#turn into melted dataframe with otustack() and remove "summ" rows
mcavGssSym = otuStack(mcavSym, count.columns = c(6:length(mcavSym[1, ])),
condition.columns = c(1:5)) %>% filter(otu != "summ") %>% droplevels()
#check that levels are correct/ordered
levels(mcavGssSym$otu)
## [1] "Symbiodinium" "Breviolum" "Cladocopium" "Durusdinium"
levels(mcavGssSym$depth)
## [1] "Shallow" "Mesophotic"
levels(mcavGssSym$site)
## [1] "WFGB" "EFGB" "Bright" "Geyer" "McGrail"
mcavZooxAnno = data.frame(x1 = c(0.5, 0.5, 0.5, 0.5, 0.5), x2 = c(28.5, 28.5, 28.5, 28.5, 28.5),
y1 = -0.22, y2 = -0.22, site = c("WFGB", "EFGB", "Bright", "Geyer", "McGrail"))
mcavZooxAnno$site = factor(mcavZooxAnno$site)
mcavZooxAnno$site = factor(mcavZooxAnno$site, levels = levels(mcavZooxAnno$site)[c(5, 2, 1, 3, 4)])
mcavGssSymPlot = mcavGssSym %>% left_join(mcavZooxAnno, by = "site") %>% left_join(mcavPcangsdITS)
## Joining with `by = join_by(sample, cluster)`
mcavGssSymPlot$ord = as.numeric(mcavGssSymPlot$ord)
mcavZooxPlotA = ggplot(data = mcavGssSymPlot, aes(x = ord, y = count, fill = otu, order = ord)) +
geom_point(aes(x=1, y=0.5, fill = otu), shape = 22, size = 0) +
geom_bar(stat = "identity", position = "stack", colour = "grey25", width = 1, size = 0.2, show.legend = FALSE) +
xlab("Population") +
scale_fill_manual(values = colPalZoox, name = "Symbiodiniaceae \ngenus") +
geom_segment(data = (mcavGssSymPlot %>% filter(depth == "Mesophotic")), aes(x = x1, xend = x2, y = y1, yend = y2, color = site), size = 7) +
scale_color_manual(values = gomPal, guide = "none") +
ggnewscale::new_scale_color() +
geom_segment(data = mcavGssSymPlot, aes(x = ord-0.5, xend = ord+0.5, color = cluster), y = -.07, yend = -.07, linewidth = 4) +
scale_color_manual(values = mcavKColPal[c(1, 2, 7)], name = "Lineage") +
coord_cartesian(ylim = c(0, 1), xlim = c(0.5, 28.5), clip = "off") +
scale_x_discrete(expand = c(0.005, 0.005)) +
scale_y_continuous(expand = c(0.001, 0.001)) +
facet_grid(factor(depth) ~ site, drop = TRUE, scales = "free", switch = "both", space = "free") +
geom_text(data = subset(mcavGssSymPlot, subset = otu == "Symbiodinium") %>% filter(sample %in% c("MGM116", "MGM015", "MGM001", "MGM144", "MGM062")), x = 15.5, y = -.205, aes(label = site), size = 4.4, color = "#000000") +
guides(fill = guide_legend(override.aes = list(size = 4), ncol = 1, order = 1), color = guide_legend(override.aes = list(size = .1), ncol = 1)) +
theme_bw()
mcavZooxPlot = mcavZooxPlotA + zooxTheme
mcavZooxPlot
mcavPlot1 = mcavStructure + inset_element(mcavImg, -.1,.7,.3,1, align_to = "full", ignore_tag = TRUE)
mcavPlot2 = (mcavPcaPlots) + plot_layout(guides = "collect")
mcavPlot3 = (mcavLineageViolin + theme(axis.text.y = element_text(margin = ggplot2::margin(r = -15, unit = "pt")))) + mcavHetPlot + mcavNuclDivPlot
mcavPlot4 = mcavAdmixPlot + mcavZooxPlot
# theme(axis.title.y = element_text(margin = ggplot2::margin(r = -20, unit = "pt"))
mcavPlots = mcavPlot1 + mcavPlot2 + mcavPlot3 + mcavPlot4+
plot_layout(heights = c(2,1,1,1)) +
plot_annotation(tag_levels = 'A') &
theme(plot.tag = element_text(size = 18),
legend.spacing = unit(-5, "pt"),
legend.key = element_blank(),
legend.background = element_blank())
ggsave("../figures/figure2.png", plot = mcavPlots, height = 14, width = 12, units = "in", dpi = 300)
ggsave("../figures/figure2.svg", plot = mcavPlots, height = 14, width = 12, units = "in", dpi = 300)
sintBams = read.csv("../data/nwgomMetaData.csv") %>% dplyr::filter(species == "S. intersepta") %>% dplyr::filter(!sample %in% c("SGM007", "SGM010", "SGM013", "SGM027.2", "SGM027.3", "SGM046.1", "SGM046.3", "SGM063", "SGM152.2", "SGM152.3", "SGM169", "SGM177", "SGM179", "SGM186.1", "SGM186.3", "SGM197.2", "SGM197.3", "SGM206.2", "SGM206.3"))
sintBams$sample <- gsub("\\.[1-3]*$", "", sintBams$sample)
sintBams = sintBams %>% dplyr::relocate(sample)
sintBams$bank = factor(sintBams$bank)
sintBams$bank = factor(sintBams$bank, levels = levels(sintBams$bank)[c(5, 2, 1, 3, 4)])
sintBams$depthZone = factor(sintBams$depthZone)
sintBams$depthZone = factor(sintBams$depthZone, levels = levels(sintBams$depthZone)[c(2, 1)])
sintMa = as.matrix(read.table("../data/sint/sintNoClones.ibsMat")) # reads in IBS matrix produced by ANGSD
dimnames(sintMa) = list(sintBams[,3],sintBams[,3])
## admixture K = 2
#-------------------------------------
sintK2ad = read.table("../data/sint/admix/sintK2.output") %>% dplyr::select(V6, V7)
sintK2ad %>% summarise(sum(V6),sum(V7))
## sum(V6) sum(V7)
## 1 103.7485 99.2515
sintAdmixK2 = sintBams %>%
dplyr::select(sample, bank, "depth" = depthZone, "depthm" = depthM) %>%
cbind(sintK2ad) %>%
dplyr::rename("Si1" = "V6", "Si2" = "V7")
sintAdmixK2_melt = melt(sintAdmixK2, id = c("sample", "bank", "depth", "depthm"))
## admixture K = 3
#-------------------------------------
sintK3ad = read.table("../data/sint/admix/sintK3.output") %>% dplyr::select(V6, V7, V8)
sintK3ad %>% summarise(sum(V6),sum(V7), sum(V8))
## sum(V6) sum(V7) sum(V8)
## 1 73.0318 70.4823 59.4852
sintAdmixK3 = sintBams %>%
dplyr::select(sample, bank, "depth" = depthZone, "depthm" = depthM) %>%
cbind(sintK3ad) %>%
dplyr::rename("Si1" = "V6", "Si2" = "V7", "Si3" = "V8") %>%
dplyr::select(order(colnames(.)))
sintAdmixK3_melt = melt(sintAdmixK3, id = c("sample", "bank", "depth", "depthm"))
## ngsAdmix K = 4
#-------------------------------------
sintK4ad = read.table("../data/sint/admix/sintK4.output") %>% dplyr::select(V6, V7, V8, V9)
sintK4ad %>% summarise(sum(V6),sum(V7), sum(V8), sum(V9))
## sum(V6) sum(V7) sum(V8) sum(V9)
## 1 50.0275 48.5974 48.3334 56.0415
sintAdmixK4 = sintBams %>%
dplyr::select(sample, bank, "depth" = depthZone, "depthm" = depthM) %>%
cbind(sintK4ad) %>%
dplyr::rename("Si1" = "V6", "Si2" = "V7", "Si3" = "V8", "Si4" = "V9") %>%
dplyr::select(order(colnames(.)))
sintAdmixK4_melt = melt(sintAdmixK4, id = c("sample", "bank", "depth", "depthm"))
## admixture K = 5
#-------------------------------------
sintK5ad = read.table("../data/sint/admix/sintK5.output") %>% dplyr::select(V6, V7, V8, V9, V10)
sintK5ad %>% summarise(sum(V6),sum(V7), sum(V8), sum(V9), sum(V10))
## sum(V6) sum(V7) sum(V8) sum(V9) sum(V10)
## 1 43.6614 41.4828 40.1593 44.0573 33.6384
sintAdmixK5 = sintBams %>%
dplyr::select(sample, bank, "depth" = depthZone, "depthm" = depthM) %>%
cbind(sintK5ad) %>%
dplyr::rename("Si1" = "V6", "Si2" = "V7", "Si3" = "V8", "Si4" = "V9", "Si5" = "V10") #%>%
sintAdmixK5_melt = melt(sintAdmixK5, id = c("sample", "bank", "depth", "depthm"))
## admixture K = 6
#-------------------------------------
sintK6ad = read.table("../data/sint/admix/sintK6.output") %>% dplyr::select(V6, V7, V8, V9, V10,V11)
sintK6ad %>% summarise(sum(V6),sum(V7), sum(V8), sum(V9), sum(V10), sum(V11))
## sum(V6) sum(V7) sum(V8) sum(V9) sum(V10) sum(V11)
## 1 30.0317 36.7717 36.8505 36.0188 32.8497 30.4772
sintAdmixK6 = sintBams %>%
dplyr::select(sample, bank, "depth" = depthZone, "depthm" = depthM) %>%
cbind(sintK6ad) %>%
dplyr::rename("Si1" = "V6", "Si2" = "V7", "Si3" = "V8", "Si4" = "V9", "Si5" = "V10", "Si6" = "V11") #%>%
sintAdmixK6_melt = melt(sintAdmixK6, id = c("sample", "bank", "depth", "depthm"))
## construct figure
#-------------------------------------
# ggtree(tr) + geom_nodelab(aes(label = node), hjust = -0.5)
{
sintTr = hclust(dist(sintMa),"ave")
sintP1 = ggtree(sintTr, layout = "rectangular", size = 0.35)
sintP2 = facet_plot(sintP1, panel = "location", data=sintAdmixK2, geom = geom_tile, mapping = aes(x = 1, fill = bank), color = "grey25", size = 0.1) +
scale_fill_manual("Site", values = gomPal, guide = guide_legend(order = 1)) +
new_scale_fill()
sintP3 = facet_plot(sintP2, panel = "depth zone", data=sintAdmixK2, geom = geom_tile, mapping = aes(x = 1, fill = depth), color = "grey25", size = 0.1) +
scale_fill_manual("Depth zone", values = c("#A7E1BCFF", "#414083FF"), guide = guide_legend(order = 2)) +
new_scale_fill()
sintP4 = facet_plot(sintP3, panel = "depth", data=sintAdmixK2, geom = geom_tile, mapping = aes(x = 1, fill = depthm), color = "grey25", size = 0.1) +
scale_fill_viridis_c("Depth (m)", option = "mako", trans = "reverse", limits = c(60, 10)) +
new_scale_fill()
sintP5 = facet_plot(sintP4, panel="K = 2", data=sintAdmixK2_melt, geom=geom_barh, width = 1.0, mapping = aes(x = value, fill = variable),
stat='identity', color = "grey25", size = 0.1, show.legend = FALSE) +
scale_fill_manual("Lineage",values = sintKColPal[c(1,5,2,3,4,6)])
sintP6 = facet_plot( sintP5, panel="K = 3", data=sintAdmixK3_melt, geom=geom_barh, width = 1.0, mapping = aes(x = value, fill = variable),
stat='identity', color = "grey25", size = 0.1, show.legend = FALSE)
sintP7 = facet_plot(sintP6, panel="K = 4", data=sintAdmixK4_melt, geom=geom_barh, width = 1.0, mapping = aes(x = value, fill = variable),
stat='identity', color = "grey25", size = 0.1, show.legend = FALSE)
sintP8 = facet_plot(sintP7, panel="K = 5", data=sintAdmixK5_melt, geom=geom_barh, width = 1.0, mapping = aes(x = value, fill = variable),
stat='identity', color = "grey25", size = 0.1, show.legend = FALSE)
sintP9 = facet_plot(sintP8, panel="K = 6", data=sintAdmixK6_melt, geom=geom_barh, width = 1.0, mapping = aes(x = value, fill = variable),
stat='identity', color = "grey25", size = 0.1, show.legend = FALSE) +
theme_tree(strip.text = element_blank(),
panel.spacing = unit(.1, "line"))
}
sintImg = image_read("../figures/icons/sint.png") %>% image_ggplot()
sintStructure = facet_widths(sintP9, widths = c(0.5, 0.025, 0.025, 0.025, 0.2, 0.1, 0.1, 0.1, 0.1))
sintStructure
sintData = read.csv("../data/nwgomMetaData.csv") %>% dplyr::filter(species == "S. intersepta") %>% dplyr::filter(!sample %in% c("SGM007", "SGM010", "SGM013", "SGM027.2", "SGM027.3", "SGM046.1", "SGM046.3", "SGM063", "SGM152.2", "SGM152.3", "SGM169", "SGM177", "SGM179", "SGM186.1", "SGM186.3", "SGM197.2", "SGM197.3", "SGM206.2", "SGM206.3"))
sintData$depthZone = factor(sintData$depthZone)
sintData$depthZone = factor(sintData$depthZone, levels(sintData$depthZone)[c(2,1)])
sintData$bank = factor(sintData$bank)
sintData$bank = factor(sintData$bank,levels(sintData$bank)[c(5, 2, 1, 4, 3)])
sintPcadmix = read.table("../data/sint/admix/sintK2.output")
sintPcadmix %>% summarise(sum(V6),sum(V7))
## sum(V6) sum(V7)
## 1 103.7485 99.2515
sintPcadmix = sintData %>% cbind(sintPcadmix) %>% rename("cluster1" = "V6", "cluster2" = "V7") %>%dplyr::select(order(colnames(.)))
sintPcadmixClust = sintPcadmix %>% mutate(cluster = ifelse(cluster1 < 0.75 & cluster2 < 0.75, "NA", ifelse(cluster1 >=0.75, 1, ifelse(cluster2 >= 0.75, 2, 0))))
sintPcadmix = sintPcadmix %>% mutate(sintPcadmixClust)
sintPcadmix$cluster = as.factor(sintPcadmix$cluster)
levels(sintPcadmix$cluster) = c("Si_1", "Si_2", "Admixed")
sintSiteLineages = sintPcadmix %>% dplyr::select(depthZone, cluster) %>%
group_by(depthZone) %>% count(cluster) %>% mutate(Freq = n/sum(n)) %>% apply(2, function(x) gsub("\\s+", "", x)) %>% as.data.frame()
sintSiteLineages
## depthZone cluster n Freq
## 1 Shallow Si_1 6 0.10344828
## 2 Shallow Si_2 11 0.18965517
## 3 Shallow Admixed 41 0.70689655
## 4 Mesophotic Si_1 19 0.13103448
## 5 Mesophotic Si_2 12 0.08275862
## 6 Mesophotic Admixed 114 0.78620690
sintCov = read.table("../data/sint/sintPcangsd.cov") %>% as.matrix()
sintPcAdmix = read.table("../data/sint/admix/sintK2.output") %>%dplyr::select(V6, V7)
sintPcAdmix %>% summarise(sum(V6),sum(V7))
## sum(V6) sum(V7)
## 1 103.7485 99.2515
sintPcAdmix = sintPcAdmix %>% rename("cluster1" = "V6", "cluster2" = "V7") %>% dplyr::select(order(colnames(.)))
sintPcaEig = eigen(sintCov)
sintPcaVar = sintPcaEig$values/sum(sintPcaEig$values)*100
head(sintPcaVar)
## [1] 0.7628354 0.6680628 0.6538201 0.6487587 0.6439072 0.6427670
sintPcangsd = read.csv("../data/nwgomMetaData.csv") %>% dplyr::filter(species == "S. intersepta") %>% dplyr::filter(!sample %in% c("SGM007", "SGM010", "SGM013", "SGM027.2", "SGM027.3", "SGM046.1", "SGM046.3", "SGM063", "SGM152.2", "SGM152.3", "SGM169", "SGM177", "SGM179", "SGM186.1", "SGM186.3", "SGM197.2", "SGM197.3", "SGM206.2", "SGM206.3")) %>% dplyr::select("sample" = sample, "bank" = bank, "depth" = depthZone, "depthm" = depthM)
sintPcangsd$sitedepth = as.factor(paste(sintPcangsd$bank, sintPcangsd$depth, sep = " "))
sintPcangsd$sitedepth = factor(sintPcangsd$sitedepth, levels(sintPcangsd$sitedepth)[c(7, 6, 3, 2, 1, 4, 5)])
sintPcangsd$bank = factor(sintPcangsd$bank)
sintPcangsd$bank = factor(sintPcangsd$bank, levels(sintPcangsd$bank)[c(5, 2, 1, 3, 4)])
sintPcangsd$depth = factor(sintPcangsd$depth)
sintPcangsd$depth = factor(sintPcangsd$depth, levels(sintPcangsd$depth)[c(2, 1)])
sintPcangsd$PC1 = sintPcaEig$vectors[,1]
sintPcangsd$PC2 = sintPcaEig$vectors[,2]
sintPcangsd$PC3 = sintPcaEig$vectors[,3]
sintPcangsd$PC4 = sintPcaEig$vectors[,4]
sintPcangsdClust = sintPcAdmix %>% mutate(cluster = ifelse(cluster1 < 0.75 & cluster2 < 0.75, "NA", ifelse(cluster1 >=0.75, 1, ifelse(cluster2 >= 0.75, 2, 0))))
# pcangsdClust$clusterX = as.vector(d_clust$classification)
sintPcangsd = sintPcangsd %>% mutate(sintPcangsdClust)
sintPcangsd$cluster = as.factor(sintPcangsd$cluster)
levels(sintPcangsd$cluster) = c("Si_1", "Si_2", "Admixed")
sintBamsClusters = sintPcangsd %>% dplyr::select(sample, cluster) %>% dplyr::arrange(sample)
sintBamssamples = read.delim("../data/sint/bamsNoClones", header = FALSE)
sintBamsClusters$sample = sintBamssamples$V1
# bamsClusters = as.data.frame(bamsClusters)
write.table(x = sintBamsClusters, file = "../data/sint/sintBamsClusters", sep = "\t", row.names = FALSE, col.names = FALSE, quote = FALSE)
sintPcangsd = merge(sintPcangsd, aggregate(cbind(mean.x = PC1, mean.y = PC2)~sitedepth, sintPcangsd, mean), by="sitedepth")
sintPcangsd %>% dplyr::group_by(depth,cluster) %>% dplyr::summarize(n = n())
## `summarise()` has grouped output by 'depth'. You can override using the `.groups`
## argument.
## # A tibble: 6 × 3
## # Groups: depth [2]
## depth cluster n
## <fct> <fct> <int>
## 1 Shallow Si_1 6
## 2 Shallow Si_2 11
## 3 Shallow Admixed 41
## 4 Mesophotic Si_1 19
## 5 Mesophotic Si_2 12
## 6 Mesophotic Admixed 114
sintPcaPlot12SA = ggplot() +
geom_hline(yintercept = 0, color = "gray90", size = 0.25) +
geom_vline(xintercept = 0, color = "gray90", size = 0.25) +
geom_point(data = sintPcangsd, aes(x = PC1, y = PC2, fill = bank, shape = depth, color = bank), stroke = 0, size = 3, alpha = 0.5, show.legend = FALSE) +
geom_point(data = sintPcangsd, aes(x = mean.x, y = mean.y, fill = bank, shape = depth), color = "black", size = 3.75, alpha = 1, stroke = 0.25) +
scale_shape_manual(values = c(21, 23), name = "Depth Zone") +
scale_fill_manual(values = gomPal, name = "Site") +
scale_color_manual(values = gomPal, name = "Site") +
labs(x = paste0("PC 1 (", format(round(sintPcaVar[1], 1), nsmall = 1)," %)")) +
annotate(geom = "text", x = -0.24, y = -0.033, label = paste0("PC 2 (", format(round(sintPcaVar[2], 1), nsmall = 1), " %)"), size = 3.65, angle = 90) +
guides(shape = guide_legend(override.aes = list(size = 4, stroke = 0.25, alpha = NA), order = 2, ncol = 1), fill = guide_legend(override.aes = list(shape = 22, size = 4, fill = gomPal[c(1,2,3,4,5)], alpha = NA), order = 1, ncol = 1)) +
coord_cartesian(xlim = c(-.2,.25), clip = "off") +
theme_bw()
sintPcaPlot12S = sintPcaPlot12SA +
pcaTheme +
theme(legend.position = c(0.87, 0.76),
axis.title.y = element_blank())
sintPcaPlot12S
sintPcaPlot12LA = ggplot() +
geom_hline(yintercept = 0, color = "gray90", size = 0.5) +
geom_vline(xintercept = 0, color = "gray90", size = 0.5) +
geom_point(data = sintPcangsd, aes(x = PC1, y = PC2, fill = cluster, shape = depth), color = "black", size = 3, alpha = 1, show.legend = TRUE) +
scale_shape_manual(values = c(21, 23), name = "Depth Zone") +
scale_fill_manual(values = sintKColPal[c(1,2,7)], name = "Lineage") +
labs(x = paste0("PC 1 (", format(round(sintPcaVar[1], 1), nsmall = 1)," %)"), y = paste0("PC 2 (", format(round(sintPcaVar[2], 1), nsmall = 1), " %)")) +
guides(shape = "none", fill = guide_legend(override.aes = list(shape = 22, size = 4, fill = sintKColPal[c(1,2,7)], alpha = NA), order = 1, ncol = 1))+
coord_cartesian(xlim = c(-.2,.25), clip = "off") +
theme_bw()
sintPcaPlot12L = sintPcaPlot12LA +
pcaTheme +
theme(legend.position = c(0.9,0.86))
Put all plots together
sintPcaPlots = (sintPcaPlot12S | sintPcaPlot12L) +
plot_annotation(tag_levels = 'A') &
theme(plot.tag = element_text(size = 18),
panel.background = element_rect(fill = "white"),
legend.spacing = unit(-5, "pt"),
legend.key = element_blank(),
legend.background = element_blank())
sintPcaPlots
sintAdmix = sintPcangsd %>%dplyr::select(-PC1, -PC2, -PC3, -PC4, -cluster, -depthm, -mean.x, -mean.y)
sintAdmix$bank = factor(sintAdmix$bank)
sintAdmix = arrange(sintAdmix, bank, depth, -cluster1, -cluster2)
sintPopCounts = sintAdmix %>% group_by(bank, depth) %>% summarise(n = n())
## `summarise()` has grouped output by 'bank'. You can override using the `.groups`
## argument.
sintPopCounts
## # A tibble: 7 × 3
## # Groups: bank [5]
## bank depth n
## <fct> <fct> <int>
## 1 WFGB Shallow 28
## 2 WFGB Mesophotic 30
## 3 EFGB Shallow 30
## 4 EFGB Mesophotic 28
## 5 Bright Mesophotic 30
## 6 Geyer Mesophotic 30
## 7 McGrail Mesophotic 27
sintPopCountList = reshape2::melt(lapply(sintPopCounts$n,function(x){c(1:x)}))
sintAdmix$ord = sintPopCountList$value
sintAdmixMelt = melt(sintAdmix, id.vars=c("sample", "bank", "depth", "sitedepth", "ord"), variable.name="Ancestry", value.name="Fraction")
sintAdmixMelt$Ancestry = factor(sintAdmixMelt$Ancestry)
sintAdmixMelt$Ancestry = factor(sintAdmixMelt$Ancestry, levels = rev(levels(sintAdmixMelt$Ancestry)))
sintPopAnno = data.frame(x1 = c(0.5, 0.5, 0.5, 0.5, 0.5), x2 = c(30.5, 30.5, 30.5, 30.5, 30.5),
y1 = -0.1, y2 = -0.1, sample = NA, Ancestry = NA, depth = "Mesophotic",
ord = NA, Fraction = NA,
bank = c("WFGB", "EFGB", "Bright", "Geyer", "McGrail"))
sintPopAnno$bank = factor(sintPopAnno$bank)
sintPopAnno$bank = factor(sintPopAnno$bank, levels = levels(sintPopAnno$bank)[c(5, 2, 1, 3, 4)])
Make admixture plots
sintAdmixPlotA = ggplot(data = sintAdmixMelt, aes(x = ord, y = Fraction, fill = Ancestry, order = sample)) +
geom_segment(data = sintPopAnno, aes(x = x1, xend = x2, y = -.1, yend = -.1, color = bank), size = 7) +
geom_bar(stat = "identity", position = "fill", width = 1, colour = "grey25", size = 0.2) +
facet_grid(factor(depth) ~ bank, switch = "both", scales = "free_x") +
geom_text(data = (sintAdmixMelt %>% filter(depth == "Mesophotic", bank %in% c("WFGB", "EFGB", "Bright", "Geyer", "McGrail"), sample %in% c("SGM075", "SGM127", "SGM058", "SGM208", "SGM020"), Ancestry == "cluster1")), x = 15.5, y = -.1, aes(label = bank), size = 4, color = "black") +
scale_fill_manual(values = sintKColPal) +
scale_color_manual(values = gomPal) +
scale_x_discrete(expand = c(0.005, 0.005)) +
scale_y_continuous(expand = c(0.001, 0.001)) +
coord_cartesian(ylim = c(0.0, 1.0), clip = "off") +
theme_bw()
sintAdmixPlot = sintAdmixPlotA +
theme_bw()+
admixTheme
sintAdmixPlot
leveneTest(lm(depthm ~ cluster, data = sintPcangsd))
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 1.9456 0.1456
## 200
sintDepthAov = welch_anova_test(depthm ~ cluster, data = subset(sintPcangsd, subset = sintPcangsd$cluster!="Admixed"))
sintDF = sintDepthAov$statistic[[1]]
sintDepthPH = games_howell_test(depthm ~ cluster, data = sintPcangsd, conf.level = 0.95) %>% as.data.frame()
sintDepthComps = paste(sintDepthPH$group1,"-",sintDepthPH$group2, sep ="")
# sintComps = c("D1-D2", "D1-D3", "D1-D4", "D1-S1", "D1-S2", "D1-A",)
sintDepthPs = sintDepthPH$p.adj
siDepthMultComp = sintDepthPs < .05
names(siDepthMultComp) = sintDepthComps
siDepthLet = multcompLetters(siDepthMultComp)
siDepthLetters = siDepthLet$Letters %>% as.data.frame()
sintDepthLetters = data.frame(x = factor(rownames(siDepthLetters)), y = 5, grp = siDepthLetters$.)
sintLineageViolinA = ggplot(data = sintPcangsd, aes(x = cluster, y = depthm)) +
annotate(geom = "rect", xmin = -Inf, xmax = Inf, ymin = 30, ymax = Inf, fill = "black", alpha = 0.10, color = NA) +
geom_violin(aes(fill = cluster),adjust = 1, linewidth = 0, color = "black", alpha = 0.75, width = 0.9, trim = F, scale = "area") +
geom_beeswarm(aes(fill = cluster), shape = 21, size = 2, cex = 1.5, alpha = 0.5) +
geom_violin(adjust = 1, linewidth = 0.4, color = "black", alpha = 1, width = 0.9, trim = F, fill = NA, scale = "area") +
geom_boxplot(width = 0.1, color = "black", fill = "white", outlier.colour = NA, linewidth = 0.6, alpha = 0.5) +
geom_segment(data = ticks[c(1:13),], aes(x = x, xend = xend, y = y), color = "black") +
annotate(geom = "text", x = 2.85, y = 65, label = bquote(italic("F")~" = "~.(sintDF)*"; "~italic("p")~" = 0.131"), size = 3) +
annotate(geom = "text", x = sintDepthLetters$x, y = (sintDepthLetters$y - 4), label = sintDepthLetters$grp) +
scale_fill_discrete(type = sintKColPal[c(1,2,7)], name = "Lineage") +
scale_color_discrete(type = sintKColPal[c(1,2,7)], name = "Lineage") +
xlab("Lineage") +
ylab("Depth (m)") +
scale_y_reverse(breaks = seq(5, 90, 5)) +
coord_cartesian(xlim = c(0.75, 3.25),clip = "off") +
theme_bw()
sintLineageViolin = sintLineageViolinA + violinTheme
sintLineageViolin
sintData = read.csv("../data/nwgomMetaData.csv") %>% dplyr::filter(species == "S. intersepta") %>% dplyr::filter(!sample %in% c("SGM007", "SGM010", "SGM013", "SGM027.2", "SGM027.3", "SGM046.1", "SGM046.3", "SGM063", "SGM152.2", "SGM152.3", "SGM169", "SGM177", "SGM179", "SGM186.1", "SGM186.3", "SGM197.2", "SGM197.3", "SGM206.2", "SGM206.3"))
sintHetData = read.delim("../data/sint/sintHet.out", header = FALSE, sep = " ") %>% mutate(sample = sintData$sample) %>% rename("Het" = "V2") %>% left_join(sintPcangsd) %>% select(sample, Het, cluster)
## Joining with `by = join_by(sample)`
sintHet = sintData %>% left_join(sintHetData)
## Joining with `by = join_by(sample)`
sintHetAOV = welch_anova_test(sintHet, Het ~ cluster)
sintHetAOV$p
## [1] 0.0000301
sintHetPH = games_howell_test(sintHet, Het ~ cluster)
sintHetPH$p.adj
## [1] 0.0930000 0.0060000 0.0000574
sintComps = paste(sintHetPH$group1,"-",sintHetPH$group2, sep ="")
sintPs = sintHetPH$p.adj
siMultComp = sintPs < .05
names(siMultComp) = sintComps
siLet = multcompLetters(siMultComp)
siLetters = siLet$Letters %>% as.data.frame()
sintHetLetters = data.frame(x = factor(rownames(siLetters)), y = 0.0025, grp = siLetters$.)
sintFST = read.delim("../data/sint/sintKFst.out")
sintHetPlot = ggplot(data = sintHet, aes(x = cluster, y = Het)) +
geom_violin(aes(fill = cluster, group = cluster), adjust = 1, linewidth = 0.4, color = "black", alpha = 1, trim = TRUE, scale = "area", width = 1.2) +
geom_beeswarm(aes(fill = cluster), shape = 21, cex = 0.5) +
geom_boxplot(aes(fill = cluster, group = cluster), width = 0.2, color = "black", fill = "white", outlier.colour = NA, linewidth = 0.4, alpha = 0.5) + xlab("Lineage") +
geom_text(data = sintHetLetters, aes(x = x, y = y, label = grp), size = 4) +
annotate(geom = "text", x = 2.9, y = 0, label = bquote(italic("F")~" = "~.(sintHetAOV$statistic)*"; "~italic("p")~" < 0.001"), size = 3) +
geom_segment(data = sintFST, aes(y = 0.0022, x = pop1, xend = pop2)) +
annotate(geom = "text", x = 1.5, y = 0.0023, label = bquote(italic("F")["ST"]~" = "~.(round(sintFST$weightedFst, 3)))) +
scale_fill_discrete(type = sintKColPal[c(1,2,7)], name = "Lineage") +
xlab("Lineage") +
ylab("Heterozygosity") +
scale_y_continuous(breaks = seq(0, 0.0025, 0.0005)) +
coord_cartesian(expand = TRUE, xlim = c(0.85, 3.15), ylim = c(0, 0.0025)) +
theme_bw() +
violinTheme +
theme(panel.border = element_rect(linewidth = 1))
sintHetPlot
sintNpgList = list(read_tsv("../data/sint/Si_Deep.thetas.idx.pestPG") %>% mutate(lineage = "Si_1"),
read_tsv("../data/sint/Si_1.thetas.idx.pestPG") %>% mutate(lineage = "Si_2"))
## Rows: 28 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): #(indexStart,indexStop)(firstPos_withData,lastPos_withData)(WinStart,WinSt...
## dbl (12): WinCenter, tW, tP, tF, tH, tL, Tajima, fuf, fud, fayh, zeng, nSites
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 28 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): #(indexStart,indexStop)(firstPos_withData,lastPos_withData)(WinStart,WinSt...
## dbl (12): WinCenter, tW, tP, tF, tH, tL, Tajima, fuf, fud, fayh, zeng, nSites
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
sintPiAll = purrr::reduce(sintNpgList, rbind) %>%
dplyr::group_by(lineage) %>%
filter(tP != 0) %>%
mutate(tPps = tP/nSites) %>%
dplyr::summarize(tPps = mean(tPps))
sintPiAll$Ne = (sintPiAll$tPps)/(4*2e-8)
sintPiAll
## # A tibble: 2 × 3
## lineage tPps Ne
## <chr> <dbl> <dbl>
## 1 Si_1 0.00580 72531.
## 2 Si_2 0.00587 73332.
sintPiAll$lineage = factor(sintPiAll$lineage)
sintNuclDivPlot = ggplot(sintPiAll, aes(x = lineage, y = tPps, fill = lineage, group = lineage)) +
geom_bar(position = position_dodge2(preserve = "single"), stat = "identity", color = "black") +
scale_fill_manual(values = sintKColPal) +
geom_text(position = position_dodge2(preserve = "single", width = 0.9), aes(y = tPps-.0015, label= scales::comma(round(Ne,0)), hjust = 0, fontface = "bold"), angle = 90, color = "black", ) +
labs(x = "Lineage", y = bquote("Nucleotide diversity ("*pi*")")) +
theme_bw() +
violinTheme +
theme(axis.text.x = ggtext::element_markdown())
sintNuclDivPlot
sintData = read.csv("../data/nwgomMetaData.csv") %>% dplyr::filter(species == "S. intersepta") %>% dplyr::filter(!sample %in% c("SGM007", "SGM010", "SGM013", "SGM027.2", "SGM027.3", "SGM046.1", "SGM046.3", "SGM063", "SGM152.2", "SGM152.3", "SGM169", "SGM177", "SGM179", "SGM186.1", "SGM186.3", "SGM197.2", "SGM197.3", "SGM206.2", "SGM206.3")) %>% dplyr::select(sample, "site" = bank, "depth" = depthZone)
sintZoox = read.delim("../data/sint/sintZooxReads", header = FALSE, check.names = FALSE)
head(sintZoox)
## V1 V2
## 1 SGM001.trim.zoox.bt2.bam NA
## 2 chr1 49
## 3 chr2 75
## 4 chr3 77
## 5 chr4 107
## 6 chr5 9
# Reconstruct read mapping output into dataframe usable for analysis
sintZoox$V2[is.na(sintZoox$V2)] <- as.character(sintZoox$V1[is.na(sintZoox$V2)])
sintZoox$V1 = gsub(pattern = "MGM*.", "chr", sintZoox$V1)
sintZoox$V2 = gsub(".trim.*", "", sintZoox$V2)
sintZoox = sintZoox %>% filter(sintZoox$V1 != "*")
sintZooxLst = split(sintZoox$V2, as.integer(gl(length(sintZoox$V2), 20, length(sintZoox$V2))))
sintZooxMaps = NULL
for(i in sintZooxLst){
sintZooxMaps = rbind(sintZooxMaps, data.frame(t(i)))
}
# remove tech reps
sintZooxMaps = sintZooxMaps %>% dplyr::filter(!X1 %in% c("SGM007", "SGM010", "SGM013", "SGM027.2", "SGM027.3", "SGM046.1", "SGM046.3", "SGM063", "SGM152.2", "SGM152.3", "SGM169", "SGM177", "SGM179", "SGM186.1", "SGM186.3", "SGM197.2", "SGM197.3", "SGM206.2", "SGM206.3"))
colnames(sintZooxMaps) = c("sample",sintZoox$V1[c(2:20)])
# convert characters to numeric
str(sintZooxMaps)
## 'data.frame': 203 obs. of 20 variables:
## $ sample: chr "SGM001" "SGM002" "SGM003" "SGM004" ...
## $ chr1 : chr "49" "95" "217" "834" ...
## $ chr2 : chr "75" "98" "259" "1051" ...
## $ chr3 : chr "77" "107" "206" "1241" ...
## $ chr4 : chr "107" "173" "296" "1291" ...
## $ chr5 : chr "9" "19" "26" "40" ...
## $ chr6 : chr "47" "97" "266" "193" ...
## $ chr7 : chr "64" "137" "333" "239" ...
## $ chr8 : chr "76" "192" "373" "319" ...
## $ chr9 : chr "32" "114" "192" "153" ...
## $ chr10 : chr "3654" "7656" "9211" "14606" ...
## $ chr11 : chr "3936" "11339" "12614" "14468" ...
## $ chr12 : chr "4394" "12409" "14093" "16147" ...
## $ chr13 : chr "3882" "11175" "12653" "14511" ...
## $ chr14 : chr "3621" "10312" "11406" "13837" ...
## $ chr15 : chr "494" "1386" "1539" "1851" ...
## $ chr16 : chr "281" "115" "215" "936" ...
## $ chr17 : chr "93" "66" "176" "330" ...
## $ chr18 : chr "115" "61" "178" "400" ...
## $ chr19 : chr "39" "12" "33" "98" ...
for(i in c(2:20)){
sintZooxMaps[,i] = as.numeric(sintZooxMaps[,i])
}
str(sintZooxMaps)
## 'data.frame': 203 obs. of 20 variables:
## $ sample: chr "SGM001" "SGM002" "SGM003" "SGM004" ...
## $ chr1 : num 49 95 217 834 202 134 150 109 161 70 ...
## $ chr2 : num 75 98 259 1051 221 ...
## $ chr3 : num 77 107 206 1241 168 ...
## $ chr4 : num 107 173 296 1291 337 ...
## $ chr5 : num 9 19 26 40 29 13 18 10 8 17 ...
## $ chr6 : num 47 97 266 193 180 161 170 91 238 82 ...
## $ chr7 : num 64 137 333 239 247 180 172 163 392 104 ...
## $ chr8 : num 76 192 373 319 318 188 192 225 440 131 ...
## $ chr9 : num 32 114 192 153 131 79 84 105 202 100 ...
## $ chr10 : num 3654 7656 9211 14606 8695 ...
## $ chr11 : num 3936 11339 12614 14468 12436 ...
## $ chr12 : num 4394 12409 14093 16147 14062 ...
## $ chr13 : num 3882 11175 12653 14511 12625 ...
## $ chr14 : num 3621 10312 11406 13837 11832 ...
## $ chr15 : num 494 1386 1539 1851 1520 ...
## $ chr16 : num 281 115 215 936 64 ...
## $ chr17 : num 93 66 176 330 79 101 45 55 292 359 ...
## $ chr18 : num 115 61 178 400 52 100 39 39 234 460 ...
## $ chr19 : num 39 12 33 98 10 18 12 5 41 144 ...
# collapse fake chromosomes into representative genera
sintZooxMaps$Symbiodinium = rowSums(sintZooxMaps[2:6])
sintZooxMaps$Breviolum = rowSums(sintZooxMaps[7:10])
sintZooxMaps$Cladocopium = rowSums(sintZooxMaps[11:16])
sintZooxMaps$Durusdinium = rowSums(sintZooxMaps[17:20])
# keep genera totals and turn into proportions for barplot
sintZooxMaps = sintZooxMaps[,c(1, 21:24)]
sintZooxProp = sintZooxMaps
sintZooxProp$sum = apply(sintZooxProp[, c(2:length(sintZooxProp[1,]))], 1, function(x) {
sum(x, na.rm = T)
})
sintZooxProp = cbind(sintZooxProp$sample, (sintZooxProp[, c(2:(ncol(sintZooxProp)-1))]
/ sintZooxProp$sum))
colnames(sintZooxProp)[1] = "sample"
head(sintZooxProp)
## sample Symbiodinium Breviolum Cladocopium Durusdinium
## 1 SGM001 0.015062960 0.010406272 0.9494417 0.025089095
## 2 SGM002 0.008854813 0.009718698 0.9768551 0.004571387
## 3 SGM003 0.015617708 0.018106586 0.9569113 0.009364403
## 4 SGM004 0.053994791 0.010951602 0.9136834 0.021370162
## 5 SGM005 0.015140489 0.013859005 0.9677572 0.003243260
## 6 SGM006 0.012986729 0.013292814 0.9644067 0.009313715
# Check that all samples total to 1
apply(sintZooxProp[, c(2:(ncol(sintZooxProp)))], 1, function(x) {
sum(x, na.rm = T)
})
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [42] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [83] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [124] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [165] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
# add sample metadata to proportions
sintSym = sintData %>% left_join(sintZooxProp)
## Joining with `by = join_by(sample)`
sintAdmixOrd = sintAdmix %>% dplyr::select(sample, ord)
sintPcangsdITS =sintPcangsd %>% dplyr::select(sample, cluster)
sintSym = sintSym %>% left_join(sintAdmixOrd) %>% relocate(ord,.after = depth) %>% left_join(sintPcangsdITS) %>% relocate(cluster, .after = depth)
## Joining with `by = join_by(sample)`
## Joining with `by = join_by(sample)`
sintZooxPermData = sintData %>% left_join(sintZooxMaps) %>% left_join(sintPcangsdITS) %>% relocate(cluster, .after = depth)
## Joining with `by = join_by(sample)`
## Joining with `by = join_by(sample)`
sintZooxPermData$depth = factor(sintZooxPermData$depth)
sintZooxPermData$depth = factor(sintZooxPermData$depth, levels = levels(sintZooxPermData$depth)[c(2, 1)])
sintZooxPermData$site = factor(sintZooxPermData$site)
sintZooxPermData$site = factor(sintZooxPermData$site, levels = levels(sintZooxPermData$site)[c(5, 2, 1, 3, 4)])
head(sintZooxPermData)
## sample site depth cluster Symbiodinium Breviolum Cladocopium Durusdinium
## 1 SGM001 WFGB Mesophotic Admixed 317 219 19981 528
## 2 SGM002 WFGB Mesophotic Admixed 492 540 54277 254
## 3 SGM003 McGrail Mesophotic Si_2 1004 1164 61516 602
## 4 SGM004 McGrail Mesophotic Si_2 4457 904 75420 1764
## 5 SGM005 McGrail Mesophotic Admixed 957 876 61170 205
## 6 SGM006 McGrail Mesophotic Admixed 594 608 44111 426
set.seed(694)
sintSymPerm = adonis2(decostand(sintZooxPermData[ c(5:((ncol(sintZooxPermData))))], "normalize") ~ depth * site, data = sintZooxPermData, permutations = 9999, method = "bray", sqrt.dist = TRUE, by = "terms")
sintSymPerm
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = decostand(sintZooxPermData[c(5:((ncol(sintZooxPermData))))], "normalize") ~ depth * site, data = sintZooxPermData, permutations = 9999, method = "bray", sqrt.dist = TRUE, by = "terms")
## Df SumOfSqs R2 F Pr(>F)
## depth 1 0.1346 0.02423 5.2797 0.0001 ***
## site 4 0.3855 0.06940 3.7805 0.0001 ***
## depth:site 1 0.0385 0.00693 1.5098 0.1328
## Residual 196 4.9970 0.89945
## Total 202 5.5556 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sintSymPermPW = pairwise.adonis2(decostand(sintZooxPermData[ c(5:((ncol(sintZooxPermData))))], "normalize") ~ site, data = sintZooxPermData, nperm = 9999, method = "bray", sqrt.dist = TRUE, by = "terms")
sintSymPermPW
## $parent_call
## [1] "decostand(sintZooxPermData[c(5:((ncol(sintZooxPermData))))], \"normalize\") ~ site , strata = Null , permutations 9999"
##
## $WFGB_vs_McGrail
## Df SumOfSqs R2 F Pr(>F)
## site 1 0.01533 0.01167 0.9796 0.404
## Residual 83 1.29858 0.98833
## Total 84 1.31390 1.00000
##
## $WFGB_vs_Bright
## Df SumOfSqs R2 F Pr(>F)
## site 1 0.05238 0.03544 3.16 0.002 **
## Residual 86 1.42548 0.96456
## Total 87 1.47785 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $WFGB_vs_EFGB
## Df SumOfSqs R2 F Pr(>F)
## site 1 0.0345 0.00968 1.1147 0.317
## Residual 114 3.5276 0.99032
## Total 115 3.5621 1.00000
##
## $WFGB_vs_Geyer
## Df SumOfSqs R2 F Pr(>F)
## site 1 0.28654 0.12671 12.478 0.001 ***
## Residual 86 1.97490 0.87329
## Total 87 2.26144 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $McGrail_vs_Bright
## Df SumOfSqs R2 F Pr(>F)
## site 1 0.0156 0.02253 1.2676 0.256
## Residual 55 0.6768 0.97747
## Total 56 0.6924 1.00000
##
## $McGrail_vs_EFGB
## Df SumOfSqs R2 F Pr(>F)
## site 1 0.02821 0.01005 0.8425 0.506
## Residual 83 2.77898 0.98995
## Total 84 2.80719 1.00000
##
## $McGrail_vs_Geyer
## Df SumOfSqs R2 F Pr(>F)
## site 1 0.19074 0.13461 8.5552 0.001 ***
## Residual 55 1.22623 0.86539
## Total 56 1.41697 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $Bright_vs_EFGB
## Df SumOfSqs R2 F Pr(>F)
## site 1 0.04487 0.01521 1.3279 0.176
## Residual 86 2.90588 0.98479
## Total 87 2.95075 1.00000
##
## $Bright_vs_Geyer
## Df SumOfSqs R2 F Pr(>F)
## site 1 0.14906 0.09923 6.3894 0.001 ***
## Residual 58 1.35313 0.90077
## Total 59 1.50219 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $EFGB_vs_Geyer
## Df SumOfSqs R2 F Pr(>F)
## site 1 0.2159 0.05882 5.3747 0.001 ***
## Residual 86 3.4553 0.94118
## Total 87 3.6712 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## attr(,"class")
## [1] "pwadstrata" "list"
S. intersepta lineage vs Symbiodiniaceae B-C distance
sintZooxDist = sintZooxPermData[ c(5:ncol(sintZooxPermData))] %>% decostand("normalize") %>% vegdist(method = "bray")
sintZooxPCoA = cmdscale(sintZooxDist, eig = TRUE, x.ret = TRUE)
sintAdmixDist = sintAdmixK2[c(5:ncol(sintAdmixK2))] %>% vegdist(method = "euclidean")
sintAdmixPCoA = cmdscale(sintAdmixDist, eig = TRUE, x.ret = TRUE)
set.seed(981)
sintProcrustes = protest(X = sintAdmixPCoA, Y = sintZooxPCoA , permutations = 9999)
sintProcrustes
##
## Call:
## protest(X = sintAdmixPCoA, Y = sintZooxPCoA, permutations = 9999)
##
## Procrustes Sum of Squares (m12 squared): 0.9985
## Correlation in a symmetric Procrustes rotation: 0.03859
## Significance: 0.7103
##
## Permutation: free
## Number of permutations: 9999
sintSym$depth = factor(sintSym$depth)
sintSym$depth = factor(sintSym$depth, levels = levels(sintSym$depth)[c(2, 1)])
sintSym$site = factor(sintSym$site)
sintSym$site = factor(sintSym$site, levels = levels(sintSym$site)[c(5, 2, 1, 3, 4)])
#turn into melted dataframe with otustack() and remove "summ" rows
sintGssSym = otuStack(sintSym, count.columns = c(6:length(sintSym[1, ])),
condition.columns = c(1:5)) %>% filter(otu != "summ") %>% droplevels()
#check that levels are correct/ordered
levels(sintGssSym$otu)
## [1] "Symbiodinium" "Breviolum" "Cladocopium" "Durusdinium"
levels(sintGssSym$depth)
## [1] "Shallow" "Mesophotic"
levels(sintGssSym$site)
## [1] "WFGB" "EFGB" "Bright" "Geyer" "McGrail"
Creating Symbiodiniaceae genera relative proportion barplot
sintZooxAnno = data.frame(x1 = c(0.5, 0.5, 0.5, 0.5, 0.5), x2 = c(30.5, 30.5, 30.5, 30.5, 30.5),
y1 = -0.22, y2 = -0.22, site = c("WFGB", "EFGB", "Bright", "Geyer", "McGrail"))
sintZooxAnno$site = factor(sintZooxAnno$site)
sintZooxAnno$site = factor(sintZooxAnno$site, levels = levels(sintZooxAnno$site)[c(5, 2, 1, 3, 4)])
sintGssSymPlot = sintGssSym %>% left_join(sintZooxAnno, by = "site") %>% left_join(sintPcangsdITS)
## Joining with `by = join_by(sample, cluster)`
sintGssSymPlot$ord = as.numeric(sintGssSymPlot$ord)
sintZooxPlotA = ggplot(data = sintGssSymPlot, aes(x = ord, y = count, fill = otu, order = ord)) +
geom_point(aes(x=1, y=0.5, fill = otu), shape = 22, size = 0) +
geom_bar(stat = "identity", position = "stack", colour = "grey25", width = 1, size = 0.2, show.legend = FALSE) +
xlab("Population") +
scale_fill_manual(values = colPalZoox, name = "Symbiodiniaceae \ngenus") +
geom_segment(data = (sintGssSymPlot %>% filter(depth == "Mesophotic")), aes(x = x1, xend = x2, y = y1, yend = y2, color = site), size = 7) +
scale_color_manual(values = gomPal, guide = "none") +
ggnewscale::new_scale_color() +
geom_segment(data = sintGssSymPlot, aes(x = ord-0.5, xend = ord+0.5, color = cluster), y = -.07, yend = -.07, linewidth = 4) +
scale_color_manual(values = sintKColPal[c(1,2,7)], name = "Lineage") +
coord_cartesian(ylim = c(0, 1), xlim = c(0.5, 30.5), clip = "off") +
scale_x_discrete(expand = c(0.005, 0.005)) +
scale_y_continuous(expand = c(0.001, 0.001)) +
facet_grid(factor(depth) ~ site, drop = TRUE, scales = "free", switch = "both", space = "free") +
geom_text(data = subset(sintGssSymPlot, subset = otu == "Symbiodinium") %>% filter(sample %in% c("SGM075", "SGM127", "SGM058", "SGM208", "SGM020")), x = 15.5, y = -.205, aes(label = site), size = 4.4, color = "#000000") +
guides(fill = guide_legend(override.aes = list(size = 4), ncol = 1, order = 1), color = guide_legend(override.aes = list(size = 0.1), ncol = 1)) +
theme_bw()
sintZooxPlot = sintZooxPlotA + zooxTheme
sintZooxPlot
sintPlot1 = sintStructure + inset_element(sintImg, -0.1, 0.7, 0.3, 1, align_to = "full", ignore_tag = TRUE)
sintPlot2 = (sintPcaPlots) + plot_layout(guides = "collect")
sintPlot3 = (sintLineageViolin+ theme(axis.text.y = element_text(margin = ggplot2::margin(r = -15, unit = "pt")))) + sintHetPlot + sintNuclDivPlot
sintPlot4 = sintAdmixPlot + sintZooxPlot
sintPlots = sintPlot1 + sintPlot2 + sintPlot3 + sintPlot4 +
plot_layout(heights = c(2,1,1,1)) +
plot_annotation(tag_levels = 'A') &
theme(plot.tag = element_text(size = 18),
legend.spacing = unit(-5, "pt"),
legend.key = element_blank(),
legend.background = element_blank())
ggsave("../figures/figure3.png", plot = sintPlots, height = 14, width = 12, units = "in", dpi = 300)
ggsave("../figures/figure3.svg", plot = sintPlots, height = 14, width = 12, units = "in", dpi = 300)
ofavBams = read.csv("../data/nwgomMetaData.csv") %>% dplyr::filter(species == "O. faveolata") %>% dplyr::filter(!sample %in% c("OGM003", "OGM043", "OGM075", "OGM076", "OGM121", "OGM126", "OGM108.2", "OGM108.3", "OGM024.3", "OGM024.1", "OGM017", "OGM066.2", "OGM066.1", "OGM071", "OGM072", "OGM016", "OGM009", "OGM077", "OGM086", "OGM013", "OGM115", "OGM118", "OGM012", "OGM124", "OGM005"))# list of bams files and their populations (tech reps removed)
ofavBams$sample <- gsub("\\.[1-3]*$", "", ofavBams$sample)
ofavBams$bank = factor(ofavBams$bank)
ofavBams$bank = factor(ofavBams$bank, levels = levels(mcavBams$bank))
ofavBams$depthZone = factor(ofavBams$depthZone)
ofavBams$depthZone = factor(ofavBams$depthZone, levels = levels(ofavBams$depthZone)[c(2, 1)])
ofavMa = as.matrix(read.table("../data/ofav/ofavNoClones.ibsMat")) # reads in IBS matrix produced by ANGSD
dimnames(ofavMa) = list(ofavBams[,3],ofavBams[,3])
## admixture K = 2
#-------------------------------------
ofavK2ad = read.table("../data/ofav/admix/ofavK2.output") %>% dplyr::select(V6, V7)
ofavK2ad %>% summarise(sum(V6),sum(V7))
## sum(V6) sum(V7)
## 1 67.5506 48.4494
ofavAdmixK2 = ofavBams %>%
dplyr::select(sample, bank, "depth" = depthZone, "depthm" = depthM) %>%
cbind(ofavK2ad) %>%
dplyr::rename("Of1" = "V6", "Of2" = "V7")
ofavAdmixK2_melt = melt(ofavAdmixK2, id = c("sample", "bank", "depth", "depthm"))
## admixture K = 3
#-------------------------------------
ofavK3ad = read.table("../data/ofav/admix/ofavK3.output") %>% dplyr::select(V6, V7, V8)
ofavK3ad %>% summarise(sum(V6),sum(V7), sum(V8))
## sum(V6) sum(V7) sum(V8)
## 1 59.9384 47.5495 8.512
ofavAdmixK3 = ofavBams %>%
dplyr::select(sample, bank, "depth" = depthZone, "depthm" = depthM) %>%
cbind(ofavK3ad) %>%
dplyr::rename("Of1" = "V6", "Of2" = "V7", "Of3" = "V8") %>%
dplyr::select(order(colnames(.)))
ofavAdmixK3_melt = melt(ofavAdmixK3, id = c("sample", "bank", "depth", "depthm"))
## ngsAdmix K = 4
#-------------------------------------
ofavK4ad = read.table("../data/ofav/admix/ofavK4.output") %>% dplyr::select(V6, V7, V8, V9)
ofavK4ad %>% summarise(sum(V6),sum(V7), sum(V8), sum(V9))
## sum(V6) sum(V7) sum(V8) sum(V9)
## 1 59.5271 28.3381 8.5588 19.5758
ofavAdmixK4 = ofavBams %>%
dplyr::select(sample, bank, "depth" = depthZone, "depthm" = depthM) %>%
cbind(ofavK4ad) %>%
dplyr::rename("Of1" = "V6", "Of2" = "V7", "Of3" = "V8", "Of4" = "V9") %>%
dplyr::select(order(colnames(.)))
ofavAdmixK4_melt = melt(ofavAdmixK4, id = c("sample", "bank", "depth", "depthm"))
## admixture K = 5
#-------------------------------------
ofavK5ad = read.table("../data/ofav/admix/ofavK5.output") %>% dplyr::select(V6, V7, V8, V9, V10)
ofavK5ad %>% summarise(sum(V6),sum(V7), sum(V8), sum(V9), sum(V10))
## sum(V6) sum(V7) sum(V8) sum(V9) sum(V10)
## 1 32.7518 37.0866 8.1358 10.7534 27.2718
ofavAdmixK5 = ofavBams %>%
dplyr::select(sample, bank, "depth" = depthZone, "depthm" = depthM) %>%
cbind(ofavK5ad) %>%
dplyr::rename("Of1" = "V6", "Of2" = "V7", "Of3" = "V8", "Of4" = "V9", "Of5" = "V10") #%>%
ofavAdmixK5_melt = melt(ofavAdmixK5, id = c("sample", "bank", "depth", "depthm"))
## admixture K = 6
#-------------------------------------
ofavK6ad = read.table("../data/ofav/admix/ofavK6.output") %>% dplyr::select(V6, V7, V8, V9, V10,V11)
ofavK6ad %>% summarise(sum(V6),sum(V7), sum(V8), sum(V9), sum(V10), sum(V11))
## sum(V6) sum(V7) sum(V8) sum(V9) sum(V10) sum(V11)
## 1 32.3898 28.4808 8.0751 16.5413 22.871 7.6421
ofavAdmixK6 = ofavBams %>%
dplyr::select(sample, bank, "depth" = depthZone, "depthm" = depthM) %>%
cbind(ofavK6ad) %>%
dplyr::rename("Of1" = "V6", "Of2" = "V7", "Of3" = "V8", "Of4" = "V9", "Of5" = "V10", "Of6" = "V11") #%>%
ofavAdmixK6_melt = melt(ofavAdmixK6, id = c("sample", "bank", "depth", "depthm"))
## construct figure
#-------------------------------------
# ggtree(tr) + geom_nodelab(aes(label = node), hjust = -0.5)
{
ofavTr = hclust(dist(ofavMa),"ave")
ofavP1 = ggtree(ofavTr, layout = "rectangular", size = 0.35)
ofavP2 = facet_plot(ofavP1, panel = "location", data=ofavAdmixK2, geom = geom_tile, mapping = aes(x = 1, fill = bank), color = "grey25", size = 0.1, show.legend = TRUE) +
scale_fill_manual("Site", values = gomPal, guide = guide_legend(order = 1), drop = FALSE) +
new_scale_fill()
ofavP3 = facet_plot(ofavP2, panel = "depth zone", data=ofavAdmixK2, geom = geom_tile, mapping = aes(x = 1, fill = depth), color = "grey25", size = 0.1) +
scale_fill_manual("Depth zone", values = c("#A7E1BCFF", "#414083FF"), guide = guide_legend(order = 2)) +
new_scale_fill()
ofavP4 = facet_plot(ofavP3, panel = "depth", data=ofavAdmixK2, geom = geom_tile, mapping = aes(x = 1, fill = depthm), color = "grey25", size = 0.1) +
scale_fill_viridis_c("Depth (m)", option = "mako", trans = "reverse", limits = c(60, 10)) +
new_scale_fill()
ofavP5 = facet_plot(ofavP4, panel="K = 2", data=ofavAdmixK2_melt, geom=geom_barh, width = 1.0, mapping = aes(x = value, fill = variable),
stat='identity', color = "grey25", size = 0.1) +
scale_fill_manual("Lineage", values = ofavKColPal[1:6], guide = "none")
ofavP6 = facet_plot( ofavP5, panel="K = 3", data=ofavAdmixK3_melt, geom=geom_barh, width = 1.0, mapping = aes(x = value, fill = variable),
stat='identity', color = "grey25", size = 0.1, show.legend = FALSE)
ofavP7 = facet_plot(ofavP6, panel="K = 4", data=ofavAdmixK4_melt, geom=geom_barh, width = 1.0, mapping = aes(x = value, fill = variable),
stat='identity', color = "grey25", size = 0.1, show.legend = FALSE)
ofavP8 = facet_plot(ofavP7, panel="K = 5", data=ofavAdmixK5_melt, geom=geom_barh, width = 1.0, mapping = aes(x = value, fill = variable),
stat='identity', color = "grey25", size = 0.1, show.legend = FALSE)
ofavP9 = facet_plot(ofavP8, panel="K = 6", data=ofavAdmixK6_melt, geom=geom_barh, width = 1.0, mapping = aes(x = value, fill = variable),
stat='identity', color = "grey25", size = 0.1, show.legend = FALSE) +
theme_tree(strip.text = element_blank(),
panel.spacing = unit(.1, "line"))
}
ofavImg = image_read("../figures/icons/ofav.png") %>% image_ggplot()
ofavStructure = facet_widths(ofavP9, widths = c(0.5, 0.025, 0.025, 0.025, 0.1, 0.1, 0.2, 0.1, 0.1)) #+ inset_element(ofavImg, 0.0, 0.9, 0.1, 1.0, align_to = "full", ignore_tag = TRUE)
ofavStructure
ofavData = read.csv("../data/nwgomMetaData.csv") %>% dplyr::filter(species == "O. faveolata") %>% dplyr::filter(!sample %in% c("OGM003", "OGM043", "OGM075", "OGM076", "OGM121", "OGM126", "OGM108.2", "OGM108.3", "OGM024.3", "OGM024.1", "OGM017", "OGM066.2", "OGM066.1", "OGM071", "OGM072", "OGM016", "OGM009", "OGM077", "OGM086", "OGM013", "OGM115", "OGM118", "OGM012", "OGM124", "OGM005"))
ofavData$depthZone = factor(ofavData$depthZone)
ofavData$depthZone = factor(ofavData$depthZone, levels(ofavData$depthZone)[c(2,1)])
ofavData$bank = factor(ofavData$bank)
ofavData$bank = factor(ofavData$bank,levels(ofavData$bank)[c(5, 2, 1, 4, 3)])
ofavPcadmix = read.table("../data/ofav/admix/ofavK3.output") %>%dplyr::select(V6, V7, V8)
ofavPcadmix %>% summarise(sum(V6),sum(V7), sum(V8))
## sum(V6) sum(V7) sum(V8)
## 1 59.9384 47.5495 8.512
ofavPcadmix = ofavData %>% cbind(ofavPcadmix) %>% rename("cluster1" = "V6", "cluster2" = "V7", "cluster3" = "V8") %>%dplyr::select(order(colnames(.)))
ofavPcadmixClust = ofavPcadmix %>% mutate(cluster = ifelse(cluster1 < 0.75 & cluster2 < 0.75 & cluster3 < 0.75, "NA", ifelse(cluster1 >=0.75, 1, ifelse(cluster2 >= 0.75, 2, ifelse(cluster3 >= 0.75, 3, 0)))))
ofavPcadmix = ofavPcadmix %>% mutate(ofavPcadmixClust)
ofavPcadmix$cluster = as.factor(ofavPcadmix$cluster)
levels(ofavPcadmix$cluster) = c("Ofav1", "Ofav2", "Ofav3", "Admixed")
ofavSiteLineages = ofavPcadmix %>% dplyr::select(depthZone, cluster) %>%
group_by(depthZone) %>% count(cluster) %>% mutate(Freq = n/sum(n)) %>% apply(2, function(x) gsub("\\s+", "", x)) %>% as.data.frame()
ofavCov = read.table("../data/ofav/ofavPcangsd.cov") %>% as.matrix()
ofavPcAdmix = read.table("../data/ofav/admix/ofavK3.output") %>%dplyr::select(V6, V7, V8)
ofavPcAdmix %>% summarise(sum(V6),sum(V7), sum(V8))
## sum(V6) sum(V7) sum(V8)
## 1 59.9384 47.5495 8.512
ofavPcAdmix = ofavPcAdmix %>% rename("cluster1" = "V6", "cluster2" = "V8", "cluster3" = "V7") %>% dplyr::select(order(colnames(.)))
ofavPcaEig = eigen(ofavCov)
ofavPcaVar = ofavPcaEig$values/sum(ofavPcaEig$values)*100
head(ofavPcaVar)
## [1] 17.9764042 2.1801653 1.0518074 0.9772828 0.9348404 0.9206296
ofavPcangsd = read.csv("../data/nwgomMetaData.csv") %>% dplyr::filter(species == "O. faveolata") %>% dplyr::filter(!sample %in% c("OGM003", "OGM043", "OGM075", "OGM076", "OGM121", "OGM126", "OGM108.2", "OGM108.3", "OGM024.3", "OGM024.1", "OGM017", "OGM066.2", "OGM066.1", "OGM071", "OGM072", "OGM016", "OGM009", "OGM077", "OGM086", "OGM013", "OGM115", "OGM118", "OGM012", "OGM124", "OGM005")) %>% dplyr::select("sample" = sample, "bank" = bank, "depth" = depthZone, "depthm" = depthM)
ofavPcangsd$sitedepth = as.factor(paste(ofavPcangsd$bank, ofavPcangsd$depth, sep = " "))
ofavPcangsd$sitedepth = factor(ofavPcangsd$sitedepth, levels(ofavPcangsd$sitedepth)[c(4, 3, 2, 1)])
ofavPcangsd$bank = factor(ofavPcangsd$bank)
ofavPcangsd$bank = factor(ofavPcangsd$bank, levels(ofavPcangsd$bank)[c(5, 2, 1, 4, 3)])
ofavPcangsd$depth = factor(ofavPcangsd$depth)
ofavPcangsd$depth = factor(ofavPcangsd$depth, levels(ofavPcangsd$depth)[c(2, 1)])
ofavPcangsd$PC1 = ofavPcaEig$vectors[,1]
ofavPcangsd$PC2 = ofavPcaEig$vectors[,2]
ofavPcangsd$PC3 = ofavPcaEig$vectors[,3]
ofavPcangsd$PC4 = ofavPcaEig$vectors[,4]
ofavPcangsdClust = ofavPcAdmix %>% mutate(cluster = ifelse(cluster1 < 0.75 & cluster2 < 0.75 & cluster3 < 0.75, "NA", ifelse(cluster1 >=0.75, 1, ifelse(cluster2 >= 0.75, 2, ifelse(cluster3 >= 0.75, 3, 0)))))
# pcangsdClust$clusterX = as.vector(d_clust$classification)
ofavPcangsd = ofavPcangsd %>% mutate(ofavPcangsdClust)
ofavPcangsd$cluster = as.factor(ofavPcangsd$cluster)
levels(ofavPcangsd$cluster) = c("Of_Deep1", "Of_Deep2", "Of_Shal", "Admixed")
bamsClusters = ofavPcangsd %>% dplyr::select(sample, cluster) %>% dplyr::arrange(sample)
bamssamples = read.delim("../data/ofav/bamsNoClones", header = FALSE)
bamsClusters$sample = bamssamples$V1
# bamsClusters = as.data.frame(bamsClusters)
write.table(x = bamsClusters, file = "../data/ofavBamsClusters", sep = "\t", row.names = FALSE, col.names = FALSE, quote = FALSE)
ofavPcangsd = merge(ofavPcangsd, aggregate(cbind(mean.x = PC1, mean.y = PC2)~sitedepth, ofavPcangsd, mean), by="sitedepth")
ofavPcangsd %>% dplyr::group_by(depth,cluster) %>% dplyr::summarize(n = n())
## `summarise()` has grouped output by 'depth'. You can override using the `.groups`
## argument.
## # A tibble: 5 × 3
## # Groups: depth [2]
## depth cluster n
## <fct> <fct> <int>
## 1 Shallow Of_Deep1 10
## 2 Shallow Of_Shal 47
## 3 Mesophotic Of_Deep1 51
## 4 Mesophotic Of_Deep2 7
## 5 Mesophotic Of_Shal 1
ofavPcaPlot12SA = ggplot() +
geom_hline(yintercept = 0, color = "gray90", size = 0.25) +
geom_vline(xintercept = 0, color = "gray90", size = 0.25) +
geom_point(data = ofavPcangsd, aes(x = PC1, y = PC2, fill = bank, shape = depth, color = bank), stroke = 0, size = 3, alpha = 0.5, show.legend = FALSE) +
geom_point(data = ofavPcangsd, aes(x = mean.x, y = mean.y, fill = bank, shape = depth), color = "black", size = 3.75, alpha = 1, stroke = 0.25) +
scale_shape_manual(values = c(21, 23), name = "Depth Zone") +
scale_fill_manual(values = gomPal, name = "Site") +
scale_color_manual(values = gomPal, name = "Site") +
labs(x = paste0("PC 1 (", format(round(ofavPcaVar[1], 1), nsmall = 1)," %)")) +
annotate(geom = "text", x = -0.11, y = 0.15, label = paste0("PC 2 (", format(round(ofavPcaVar[2], 1), nsmall = 1), " %)"), size = 3.65, angle = 90) +
guides(shape = guide_legend(override.aes = list(size = 4, stroke = 0.25, alpha = NA), order = 2, ncol = 1), fill = guide_legend(override.aes = list(shape = 22, size = 4, fill = gomPal[c(1,2)], alpha = NA), order = 1, ncol = 1)) +
coord_cartesian(xlim = c(-.09,.12), clip = "off") +
theme_bw()
ofavPcaPlot12S = ofavPcaPlot12SA +
pcaTheme +
theme(legend.position = c(0.87, 0.82),
axis.title.y = element_blank())
ofavPcaPlot12S
ofavPcaPlot12LA = ggplot() +
geom_hline(yintercept = 0, color = "gray90", size = 0.5) +
geom_vline(xintercept = 0, color = "gray90", size = 0.5) +
geom_point(data = ofavPcangsd, aes(x = PC1, y = PC2, fill = cluster, shape = depth), color = "black", size = 3, alpha = 1, show.legend = TRUE) +
scale_shape_manual(values = c(21, 23), name = "Depth Zone") +
scale_fill_manual(values = ofavKColPal[c(1,3,2)], name = "Lineage") +
labs(x = paste0("PC 1 (", format(round(ofavPcaVar[1], 1), nsmall = 1)," %)"), y = paste0("PC 2 (", format(round(ofavPcaVar[2], 1), nsmall = 1), " %)")) +
guides(shape = "none", fill = guide_legend(override.aes = list(shape = 22, size = 4, alpha = NA, fill = ofavKColPal[c(1,3,2)]), order = 1, ncol = 1))+
coord_cartesian(xlim = c(-.09,.12), clip = "off") +
theme_bw()
ofavPcaPlot12L = ofavPcaPlot12LA +
pcaTheme +
theme(legend.position = c(0.9,0.86))
Put all plots together
ofavPcaPlots = (ofavPcaPlot12S + ofavPcaPlot12L) +
plot_annotation(tag_levels = 'A') &
theme(plot.tag = element_text(size = 18),
panel.background = element_rect(fill = "white"),
legend.spacing = unit(-5, "pt"),
legend.key = element_blank(),
legend.background = element_blank())
ofavPcaPlots
Prepare admixture outputs for plotting
ofavAdmix = ofavPcangsd %>%dplyr::select(-PC1, -PC2, -PC3, -PC4, -cluster, -depthm, -mean.x, -mean.y)
ofavAdmix$bank = factor(ofavAdmix$bank)
ofavAdmix = arrange(ofavAdmix, bank, depth, -cluster1, -cluster2)
ofavPopCounts = ofavAdmix %>% group_by(bank, depth) %>% summarise(n = n())
## `summarise()` has grouped output by 'bank'. You can override using the `.groups`
## argument.
ofavPopCounts
## # A tibble: 4 × 3
## # Groups: bank [2]
## bank depth n
## <fct> <fct> <int>
## 1 WFGB Shallow 28
## 2 WFGB Mesophotic 33
## 3 EFGB Shallow 29
## 4 EFGB Mesophotic 26
ofavPopCountList = reshape2::melt(lapply(ofavPopCounts$n,function(x){c(1:x)}))
ofavAdmix$ord = ofavPopCountList$value
ofavAdmixMelt = melt(ofavAdmix, id.vars=c("sample", "bank", "depth", "sitedepth", "ord"), variable.name="Ancestry", value.name="Fraction")
ofavAdmixMelt$Ancestry = factor(ofavAdmixMelt$Ancestry)
ofavAdmixMelt$Ancestry = factor(ofavAdmixMelt$Ancestry, levels = rev(levels(ofavAdmixMelt$Ancestry)))
ofavPopAnno = data.frame(x1 = c(0.5, 0.5), x2 = c(33.5, 33.5),
y1 = -0.1, y2 = -0.1, sample = NA, Ancestry = NA, depth = "Mesophotic",
ord = NA, Fraction = NA,
bank = c("WFGB", "EFGB"))
ofavPopAnno$bank = factor(ofavPopAnno$bank)
ofavPopAnno$bank = factor(ofavPopAnno$bank, levels = levels(ofavPopAnno$bank)[c(2, 1)])
Make admixture plots
ofavAdmixPlotA = ggplot(data = ofavAdmixMelt, aes(x = ord, y = Fraction, fill = Ancestry, order = sample)) +
geom_segment(data = ofavPopAnno, aes(x = x1, xend = x2, y = -.1, yend = -.1, color = bank), size = 7) +
geom_bar(stat = "identity", position = "fill", width = 1, colour = "grey25", size = 0.2) +
facet_grid(factor(depth) ~ bank, switch = "both", scales = "free_x") +
geom_text(data = (ofavAdmixMelt %>% filter(depth == "Mesophotic", bank %in% c("WFGB", "EFGB"), sample %in% c(
"OGM001", "OGM073"), Ancestry == "cluster1")), x = 15.5, y = -.1, aes(label = bank), size = 4, color = "black") +
scale_fill_manual(values = ofavKColPal[c(1,3,2)]) +
scale_color_manual(values = gomPal) +
scale_x_discrete(expand = c(0.005, 0.005)) +
scale_y_continuous(expand = c(0.001, 0.001)) +
coord_cartesian(ylim = c(0.0, 1.0), clip = "off") +
theme_bw()
ofavAdmixPlot = ofavAdmixPlotA + admixTheme
ofavAdmixPlot
leveneTest(lm(depthm ~ cluster, data = ofavPcangsd))
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 8.5905 0.0003365 ***
## 113
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ofavDepthAov = welch_anova_test(depthm ~ cluster, data = subset(ofavPcangsd, subset = ofavPcangsd$cluster!="Admixed"))
ofavDepthAov
## # A tibble: 1 × 7
## .y. n statistic DFn DFd p method
## * <chr> <int> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 depthm 116 235. 2 18.8 5.15e-14 Welch ANOVA
ofavDF = ofavDepthAov$statistic[[1]]
ofavDepthPH = games_howell_test(depthm ~ cluster, data = subset(ofavPcangsd, subset = ofavPcangsd$cluster!="Admixed"), conf.level = 0.95) %>% as.data.frame()
ofavDepthComps = paste(ofavDepthPH$group1,"-",ofavDepthPH$group2, sep ="")
# ofavComps = c("D1-D2", "D1-D3", "D1-D4", "D1-S1", "D1-S2", "D1-A",)
ofavDepthPs = ofavDepthPH$p.adj
ofDepthMultComp = ofavDepthPs < .05
names(ofDepthMultComp) = ofavDepthComps
ofDepthLet = multcompLetters(ofDepthMultComp)
ofDepthLetters = ofDepthLet$Letters %>% as.data.frame()
ofavDepthLetters = data.frame(x = factor(rownames(ofDepthLetters)), y = 5, grp = ofDepthLetters$.)
ofavLineageViolinA = ggplot(data = subset(ofavPcangsd, subset = ofavPcangsd$cluster!="Admixed"), aes(x = cluster, y = depthm)) +
annotate(geom = "rect", xmin = -Inf, xmax = Inf, ymin = 30, ymax = Inf, fill = "black", alpha = 0.10, color = NA) +
geom_violin(aes(fill = cluster),adjust = 1, linewidth = 0, color = "black", alpha = 0.75, width = 0.9, trim = F, scale = "area") +
geom_beeswarm(aes(fill = cluster), shape = 21, size = 2, cex = 1.5, alpha = 0.5) +
geom_violin(adjust = 1, linewidth = 0.4, color = "black", alpha = 1, width = 0.9, trim = F, fill = NA, scale = "area") +
geom_boxplot(width = 0.1, color = "black", fill = "white", outlier.colour = NA, linewidth = 0.6, alpha = 0.5) +
geom_segment(data = ticks[c(3:13),], aes(x = x, xend = xend, y = y)) +
annotate(geom = "text", x = 2.75, y =65, label = bquote(italic("F")~" = "~.(ofavDF)*"; "~italic("p")~" < 0.001"), size = 3) +
annotate(geom = "text", x = ofavDepthLetters$x, y = ofavDepthLetters$y, label = ofavDepthLetters$grp) +
scale_fill_discrete(type = ofavKColPal[c(1,3,2)], name = "Lineage") +
scale_color_discrete(type = ofavKColPal[c(1,3,2)], name = "Lineage") +
xlab("Lineage") +
ylab("Depth (m)") +
scale_y_reverse(breaks = seq(5, 90, 5)) +
coord_cartesian(xlim = c(0.75, 3.25), clip = "off") +
theme_bw()
ofavLineageViolin = ofavLineageViolinA + violinTheme
ofavLineageViolin
Measuring with global weighted FST calculated from SFS
First prepare and sort FST for plotting
of.pops = c("Of_Deep1", "Of_Deep2", "Of_Shal")
# reads in fst
ofFstMa1 <- read.delim("../data/ofav/ofavKFst.out") %>% dplyr::select(-fst) %>% df_to_pw_mat(., "pop1", "pop2", "weightedFst")
colnames(ofFstMa1) = of.pops
rownames(ofFstMa1) = of.pops
ofFstMa = ofFstMa1
upperTriangle(ofFstMa, byrow = TRUE) <- lowerTriangle(ofFstMa)
ofFstMa <- ofFstMa[,of.pops] %>%
.[of.pops,]
ofFstMa[lower.tri(ofFstMa)] <- NA
ofFstMa <- as.data.frame(ofFstMa)
# rearrange and reformat matrix
ofFstMa$Pop = factor(row.names(ofFstMa), levels = unique(of.pops))
# melt matrix data (turn from matrix into long dataframe)
ofFst = melt(ofFstMa, id.vars = "Pop", value.name = "Fst", variable.name = "Pop2", na.rm = FALSE)
ofFst$Fst = round(ofFst$Fst, 3)
ofFst$site = ofFst$Pop
ofFst$site = factor(ofFst$site)
ofFst$site2 = ofFst$Pop2
Construct a heatmap of FST values
ofFstHeatmapA = ggplot(data = ofFst %>% filter(Fst != 0), aes(Pop, Pop2, fill = as.numeric(as.character(Fst)))) +
geom_tile(color = "white") +
geom_segment(data = ofFst, aes(x = 0.475, xend = 0.25, y = Pop2, yend = Pop2, color = site2), size = 21.25) + #colored boxes for Y-axis labels
geom_segment(data = ofFst, aes(x = Pop, xend = Pop, y = 0.2, yend = 0.475, color = site), size = 41) + #colored boxes for X-axis labels
scale_color_manual(values = ofavKColPal[c(1,3,2)], guide = NULL) +
scale_fill_gradient(low = "white", high = "#EA526F", limit = c(0, 0.24), space = "Lab", name = expression(paste(italic("F")[ST])), na.value = NA, guide = "colourbar") +
geom_text(data = ofFst %>% filter(Fst != 0), aes(Pop, Pop2, label = Fst), color = "black", size = 3.5, fontface = "bold") +
guides(fill = guide_colorbar(barwidth = 7.5, barheight = 0.75, title.position = "top", title.hjust = 0.5, direction = "horizontal", ticks.colour = "black", frame.colour = "black")) +
scale_y_discrete(position = "left", limits = rev(levels(ofFst$Pop2))) +
scale_x_discrete(limits = levels(ofFst$Pop2)[c(1:3)]) +
coord_cartesian(xlim = c(1, 3), ylim = c(1, 3), clip = "off") +
theme_minimal()
ofFstHeatmap = ofFstHeatmapA + theme(
axis.text.x = element_text(vjust = 3.5, size = 10, hjust = 0.5, color = "black"),
axis.text.y = element_text(size = 10, color = "black", angle = 90, hjust = 0.5, vjust = -1.5),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_blank(),
axis.ticks = element_blank(),
legend.title = element_text(size = 8, color = "black"),
legend.text = element_text(size = 8, color = "black"),
legend.position = c(0.6, 0.9),
plot.background = element_blank(),
panel.background = element_blank(),
)
ofFstHeatmap
ofavData = read.csv("../data/nwgomMetaData.csv") %>% dplyr::filter(species == "O. faveolata") %>% dplyr::filter(!sample %in% c("OGM003", "OGM043", "OGM075", "OGM076", "OGM121", "OGM126", "OGM108.2", "OGM108.3", "OGM024.3", "OGM024.1", "OGM017", "OGM066.2", "OGM066.1", "OGM071", "OGM072", "OGM016", "OGM009", "OGM077", "OGM086", "OGM013", "OGM115", "OGM118", "OGM012", "OGM124", "OGM005"))
ofavHetData = read.delim("../data/ofav/ofavHet.out", header = FALSE, sep = " ") %>% mutate(sample = ofavData$sample) %>% rename("Het" = "V2") %>% left_join(ofavPcangsd) %>% select(sample, Het, cluster)
## Joining with `by = join_by(sample)`
ofavHet = ofavData %>% left_join(ofavHetData)
## Joining with `by = join_by(sample)`
ofavHetAOV = welch_anova_test(ofavHet, Het ~ cluster)
ofavHetPH = games_howell_test(ofavHet, Het ~ cluster)
ofavHetPH$p.adj
## [1] 0.952 0.015 0.744
ofavComps = paste(ofavHetPH$group1,"-",ofavHetPH$group2, sep ="")
ofavPs = ofavHetPH$p.adj
ofMultComp = ofavPs < .05
names(ofMultComp) = ofavComps
ofLet = multcompLetters(ofMultComp)
ofLetters = ofLet$Letters %>% as.data.frame()
ofavHetLetters = data.frame(x = factor(rownames(ofLetters)), y = 0.0024, grp = ofLetters$.)
ofavFST = read.delim("../data/ofav/ofavKFst.out")
ofavFST$y = c(0.0025, 0.0031, 0.0028)
ofavHetPlot = ggplot(data = ofavHet, aes(x = cluster, y = Het)) +
geom_violin(aes(fill = cluster, group = cluster), adjust = 1, linewidth = 0.4, color = "black", alpha = 1, trim = FALSE, scale = "area", width = 1) +
geom_beeswarm(aes(fill = cluster), shape = 21, cex = 0.5) +
geom_boxplot(aes(fill = cluster, group = cluster), width = 0.2, color = "black", fill = "white", outlier.colour = NA, linewidth = 0.4, alpha = 0.5) + xlab("Lineage") +
geom_text(data = ofavHetLetters, aes(x = x, y = y, label = grp), size = 4) +
annotate(geom = "text", x = 2.85, y = 0, label = bquote(italic("F")~" = "~.(ofavHetAOV$statistic)*"; "~italic("p")~" = "~.(ofavHetAOV$p)), size = 3) +
geom_segment(data = ofavFST, aes(y = y, x = pop1, xend = pop2)) +
annotate(geom = "text", x = 1.5, y = 0.0026, label = bquote(italic("F")["ST"]~" = "~.(round(ofavFST$weightedFst[1], 3)))) +
annotate(geom = "text", x = 2, y = 0.0032, label = bquote(italic("F")["ST"]~" = "~.(round(ofavFST$weightedFst[3], 3)))) +
annotate(geom = "text", x = 2.5, y = 0.0029, label = bquote(italic("F")["ST"]~" = "~.(round(ofavFST$weightedFst[2], 3)))) +
scale_fill_discrete(type = ofavKColPal[c(1,3,2)], name = "Lineage") +
xlab("Lineage") +
ylab("Heterozygosity") +
scale_y_continuous(breaks = seq(0, 0.0035, 0.0005)) +
coord_cartesian(expand = TRUE, xlim = c(0.85, 3.15), ylim = c(0, 0.0032)) +
theme_bw() +
violinTheme +
theme(panel.border = element_rect(linewidth = 1),
axis.ticks.y = element_line(color = "black"))
ofavHetPlot
ofavNpgList = list(read_tsv("../data/ofav/Ofav_Deep1.thetas.idx.pestPG") %>% mutate(lineage = "Of_Deep1"),
read_tsv("../data/ofav/Ofav_Deep2.thetas.idx.pestPG") %>% mutate(lineage = "Of_Deep2"),
read_tsv("../data/ofav/Ofav_Shallow.thetas.idx.pestPG") %>% mutate(lineage = "Of_Shallow"))
## Rows: 25 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): #(indexStart,indexStop)(firstPos_withData,lastPos_withData)(WinStart,WinSt...
## dbl (12): WinCenter, tW, tP, tF, tH, tL, Tajima, fuf, fud, fayh, zeng, nSites
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 25 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): #(indexStart,indexStop)(firstPos_withData,lastPos_withData)(WinStart,WinSt...
## dbl (12): WinCenter, tW, tP, tF, tH, tL, Tajima, fuf, fud, fayh, zeng, nSites
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 25 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): #(indexStart,indexStop)(firstPos_withData,lastPos_withData)(WinStart,WinSt...
## dbl (12): WinCenter, tW, tP, tF, tH, tL, Tajima, fuf, fud, fayh, zeng, nSites
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ofavPiAll = purrr::reduce(ofavNpgList, rbind) %>%
dplyr::group_by(lineage) %>%
filter(tP != 0) %>%
mutate(tPps = tP/nSites) %>%
dplyr::summarize(tPps = mean(tPps))
ofavPiAll$Ne = (ofavPiAll$tPps)/(4*2e-8)
ofavPiAll
## # A tibble: 3 × 3
## lineage tPps Ne
## <chr> <dbl> <dbl>
## 1 Of_Deep1 0.00243 30336.
## 2 Of_Deep2 0.00244 30499.
## 3 Of_Shallow 0.00240 29953.
ofavPiAll$lineage = factor(ofavPiAll$lineage)
ofavNuclDivPlot = ggplot(ofavPiAll, aes(x = lineage, y = tPps, fill = lineage, group = lineage)) +
geom_bar(position = position_dodge2(preserve = "single"), stat = "identity", color = "black") +
scale_fill_manual(values = ofavKColPal[c(1,3,2)]) +
geom_text(position = position_dodge2(preserve = "single", width = 0.9), aes(y = tPps-.0015, label= scales::comma(round(Ne,0)), hjust = 0, fontface = "bold"), angle = 90, color = "black", ) +
labs(x = "Lineage", y = bquote("Nucleotide diversity ("*pi*")")) +
theme_bw() +
violinTheme +
theme(axis.text.x = ggtext::element_markdown())
ofavNuclDivPlot
ofavData = read.csv("../data/nwgomMetaData.csv") %>% dplyr::filter(species == "O. faveolata") %>% dplyr::filter(!sample %in% c("OGM003", "OGM043", "OGM075", "OGM076", "OGM121", "OGM126", "OGM108.2", "OGM108.3", "OGM024.3", "OGM024.1", "OGM017", "OGM066.2", "OGM066.1", "OGM071", "OGM072", "OGM016", "OGM009", "OGM077", "OGM086", "OGM013", "OGM115", "OGM118", "OGM012", "OGM124", "OGM005")) %>% dplyr::select("sample" = sample, "site" = bank, "depth" = depthZone)
ofavZoox = read.delim("../data/ofav/ofavZooxReads", header = FALSE, check.names = FALSE)
head(ofavZoox)
## V1 V2
## 1 OGM001.trim.zoox.bt2.bam NA
## 2 chr1 38
## 3 chr2 46
## 4 chr3 55
## 5 chr4 23
## 6 chr5 116
# Reconstruct read mapping output into dataframe usable for analysis
ofavZoox$V2[is.na(ofavZoox$V2)] <- as.character(ofavZoox$V1[is.na(ofavZoox$V2)])
ofavZoox$V1 = gsub(pattern = "MGM*.", "chr", ofavZoox$V1)
ofavZoox$V2 = gsub(".trim.*", "", ofavZoox$V2)
ofavZoox = ofavZoox %>% filter(ofavZoox$V1 != "*")
ofavZooxLst = split(ofavZoox$V2, as.integer(gl(length(ofavZoox$V2), 20, length(ofavZoox$V2))))
ofavZooxMaps = NULL
for(i in ofavZooxLst){
ofavZooxMaps = rbind(ofavZooxMaps, data.frame(t(i)))
}
# remove tech reps
ofavZooxMaps = ofavZooxMaps %>% dplyr::filter(!X1 %in% c("OGM003", "OGM043", "OGM075", "OGM076", "OGM121", "OGM126", "OGM108.2", "OGM108.3", "OGM024.3", "OGM024.1", "OGM017", "OGM066.2", "OGM066.1", "OGM071", "OGM072", "OGM016", "OGM009", "OGM077", "OGM086", "OGM013", "OGM115", "OGM118", "OGM012", "OGM124", "OGM005"))
colnames(ofavZooxMaps) = c("sample",ofavZoox$V1[c(2:20)])
# convert characters to numeric
str(ofavZooxMaps)
## 'data.frame': 116 obs. of 20 variables:
## $ sample: chr "OGM001" "OGM002" "OGM004" "OGM006" ...
## $ chr1 : chr "38" "30" "161" "45" ...
## $ chr2 : chr "46" "41" "354" "173" ...
## $ chr3 : chr "55" "19" "301" "214" ...
## $ chr4 : chr "23" "30" "173" "220" ...
## $ chr5 : chr "116" "88" "149" "170" ...
## $ chr6 : chr "503" "821" "28912" "5009" ...
## $ chr7 : chr "623" "1022" "32166" "5925" ...
## $ chr8 : chr "654" "1068" "31717" "6158" ...
## $ chr9 : chr "62" "102" "1448" "323" ...
## $ chr10 : chr "544" "1132" "1268" "16348" ...
## $ chr11 : chr "30" "62" "318" "515" ...
## $ chr12 : chr "54" "95" "257" "305" ...
## $ chr13 : chr "19" "61" "189" "167" ...
## $ chr14 : chr "885" "1126" "2376" "1586" ...
## $ chr15 : chr "22" "56" "86" "398" ...
## $ chr16 : chr "170" "408" "604" "3964" ...
## $ chr17 : chr "52" "113" "292" "1236" ...
## $ chr18 : chr "48" "162" "317" "1608" ...
## $ chr19 : chr "10" "45" "42" "445" ...
for(i in c(2:20)){
ofavZooxMaps[,i] = as.numeric(ofavZooxMaps[,i])
}
str(ofavZooxMaps)
## 'data.frame': 116 obs. of 20 variables:
## $ sample: chr "OGM001" "OGM002" "OGM004" "OGM006" ...
## $ chr1 : num 38 30 161 45 193 112 300 223 74 78 ...
## $ chr2 : num 46 41 354 173 251 319 415 365 216 96 ...
## $ chr3 : num 55 19 301 214 354 250 464 342 168 130 ...
## $ chr4 : num 23 30 173 220 284 95 191 239 129 123 ...
## $ chr5 : num 116 88 149 170 157 186 144 295 141 144 ...
## $ chr6 : num 503 821 28912 5009 25205 ...
## $ chr7 : num 623 1022 32166 5925 28709 ...
## $ chr8 : num 654 1068 31717 6158 28100 ...
## $ chr9 : num 62 102 1448 323 1185 ...
## $ chr10 : num 544 1132 1268 16348 3836 ...
## $ chr11 : num 30 62 318 515 455 294 471 292 252 195 ...
## $ chr12 : num 54 95 257 305 315 309 453 354 198 201 ...
## $ chr13 : num 19 61 189 167 186 247 378 321 138 79 ...
## $ chr14 : num 885 1126 2376 1586 2498 ...
## $ chr15 : num 22 56 86 398 181 91 181 162 106 141 ...
## $ chr16 : num 170 408 604 3964 1161 ...
## $ chr17 : num 52 113 292 1236 570 ...
## $ chr18 : num 48 162 317 1608 572 ...
## $ chr19 : num 10 45 42 445 88 64 112 118 80 97 ...
# collapse fake chromosomes into representative genera
ofavZooxMaps$Symbiodinium = rowSums(ofavZooxMaps[2:6])
ofavZooxMaps$Breviolum = rowSums(ofavZooxMaps[7:10])
ofavZooxMaps$Cladocopium = rowSums(ofavZooxMaps[11:16])
ofavZooxMaps$Durusdinium = rowSums(ofavZooxMaps[17:20])
# keep genera totals and turn into proportions for barplot
ofavZooxMaps = ofavZooxMaps[,c(1, 21:24)]
ofavZooxProp = ofavZooxMaps
ofavZooxProp$sum = apply(ofavZooxProp[, c(2:length(ofavZooxProp[1,]))], 1, function(x) {
sum(x, na.rm = T)
})
ofavZooxProp = cbind(ofavZooxProp$sample, (ofavZooxProp[, c(2:(ncol(ofavZooxProp)-1))]
/ ofavZooxProp$sum))
colnames(ofavZooxProp)[1] = "sample"
head(ofavZooxProp)
## sample Symbiodinium Breviolum Cladocopium Durusdinium
## 1 OGM001 0.07030855 0.4658574 0.39301973 0.07081437
## 2 OGM002 0.03209381 0.4648974 0.39068045 0.11232834
## 3 OGM004 0.01125284 0.9318995 0.04443785 0.01240977
## 4 OGM006 0.01834453 0.3886496 0.43114107 0.16186480
## 5 OGM007 0.01313892 0.8822800 0.07922587 0.02535525
## 6 OGM008 0.04146373 0.6417827 0.25279083 0.06396276
# Check that all samples total to 1
apply(ofavZooxProp[, c(2:(ncol(ofavZooxProp)))], 1, function(x) {
sum(x, na.rm = T)
})
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [42] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [83] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
# add sample metadata to proportions
ofavSym = ofavData %>% left_join(ofavZooxProp)
## Joining with `by = join_by(sample)`
ofavAdmixOrd = ofavAdmix %>% dplyr::select(sample, ord)
ofavPcangsdITS =ofavPcangsd %>% dplyr::select(sample, cluster)
ofavSym = ofavSym %>% left_join(ofavAdmixOrd) %>% relocate(ord,.after = depth) %>% left_join(ofavPcangsdITS) %>% relocate(cluster, .after = depth)
## Joining with `by = join_by(sample)`
## Joining with `by = join_by(sample)`
ofavZooxPermData = ofavData %>% left_join(ofavZooxMaps) %>% left_join(ofavPcangsdITS) %>% relocate(cluster, .after = depth)
## Joining with `by = join_by(sample)`
## Joining with `by = join_by(sample)`
ofavZooxPermData$depth = factor(ofavZooxPermData$depth)
ofavZooxPermData$depth = factor(ofavZooxPermData$depth, levels = levels(ofavZooxPermData$depth)[c(2, 1)])
ofavZooxPermData$site = factor(ofavZooxPermData$site)
ofavZooxPermData$site = factor(ofavZooxPermData$site, levels = levels(ofavZooxPermData$site)[c(2, 1)])
head(ofavZooxPermData)
## sample site depth cluster Symbiodinium Breviolum Cladocopium Durusdinium
## 1 OGM001 EFGB Mesophotic Of_Deep1 278 1842 1554 280
## 2 OGM002 EFGB Mesophotic Of_Deep1 208 3013 2532 728
## 3 OGM004 EFGB Mesophotic Of_Deep2 1138 94243 4494 1255
## 4 OGM006 EFGB Mesophotic Of_Deep1 822 17415 19319 7253
## 5 OGM007 EFGB Mesophotic Of_Deep1 1239 83199 7471 2391
## 6 OGM008 EFGB Mesophotic Of_Deep1 962 14890 5865 1484
set.seed(694)
ofavSymPerm = adonis2(decostand(ofavZooxPermData[ c(5:((ncol(ofavZooxPermData))))], "normalize") ~ depth * site, data = ofavZooxPermData, permutations = 9999, method = "bray", sqrt.dist = TRUE, by = "terms")
ofavSymPerm
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = decostand(ofavZooxPermData[c(5:((ncol(ofavZooxPermData))))], "normalize") ~ depth * site, data = ofavZooxPermData, permutations = 9999, method = "bray", sqrt.dist = TRUE, by = "terms")
## Df SumOfSqs R2 F Pr(>F)
## depth 1 0.3066 0.01720 2.1432 0.0815 .
## site 1 0.5305 0.02977 3.7091 0.0158 *
## depth:site 1 0.9613 0.05395 6.7204 0.0010 ***
## Residual 112 16.0201 0.89907
## Total 115 17.8185 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
O. faveolata lineage vs Symbiodiniaceae B-C distance
ofavZooxDist = ofavZooxPermData[ c(5:ncol(ofavZooxPermData))] %>% decostand("normalize") %>% vegdist(method = "bray")
ofavZooxPCoA = cmdscale(ofavZooxDist, eig = TRUE, x.ret = TRUE)
ofavAdmixDist = ofavAdmixK2[c(5:ncol(ofavAdmixK2))] %>% vegdist(method = "euclidean")
ofavAdmixPCoA = cmdscale(ofavAdmixDist, eig = TRUE, x.ret = TRUE)
set.seed(981)
ofavProcrustes = protest(X = ofavAdmixPCoA, Y = ofavZooxPCoA , permutations = 9999)
ofavProcrustes
##
## Call:
## protest(X = ofavAdmixPCoA, Y = ofavZooxPCoA, permutations = 9999)
##
## Procrustes Sum of Squares (m12 squared): 0.9907
## Correlation in a symmetric Procrustes rotation: 0.09623
## Significance: 0.3094
##
## Permutation: free
## Number of permutations: 9999
ofavSym$depth = factor(ofavSym$depth)
ofavSym$depth = factor(ofavSym$depth, levels = levels(ofavSym$depth)[c(2, 1)])
ofavSym$site = factor(ofavSym$site)
ofavSym$site = factor(ofavSym$site, levels = levels(ofavSym$site)[c(5, 2, 1, 3, 4)])
#turn into melted dataframe with otustack() and remove "summ" rows
ofavGssSym = otuStack(ofavSym, count.columns = c(6:length(ofavSym[1, ])),
condition.columns = c(1:5)) %>% filter(otu != "summ") %>% droplevels()
#check that levels are correct/ordered
levels(ofavGssSym$otu)
## [1] "Symbiodinium" "Breviolum" "Cladocopium" "Durusdinium"
levels(ofavGssSym$depth)
## [1] "Shallow" "Mesophotic"
levels(ofavGssSym$site)
## [1] "WFGB" "EFGB"
ofavZooxAnno = data.frame(x1 = c(0.5, 0.5), x2 = c(33.5, 33.5),
y1 = -0.22, y2 = -0.22, site = c("WFGB", "EFGB"))
ofavZooxAnno$site = factor(ofavZooxAnno$site)
ofavZooxAnno$site = factor(ofavZooxAnno$site, levels = levels(ofavZooxAnno$site)[c(5, 2, 1, 3, 4)])
ofavGssSymPlot = ofavGssSym %>% left_join(ofavZooxAnno, by = "site") %>% left_join(ofavPcangsdITS)
## Joining with `by = join_by(sample, cluster)`
ofavGssSymPlot$ord = as.numeric(ofavGssSymPlot$ord)
ofavZooxPlotA = ggplot(data = ofavGssSymPlot, aes(x = ord, y = count, fill = otu, order = ord)) +
geom_point(aes(x=1, y=0.5, fill = otu), shape = 22, size = 0) +
geom_bar(stat = "identity", position = "stack", colour = "grey25", width = 1, size = 0.2, show.legend = FALSE) +
xlab("Population") +
scale_fill_manual(values = colPalZoox, name = "Symbiodiniaceae \ngenus") +
geom_segment(data = (ofavGssSymPlot %>% filter(depth == "Mesophotic")), aes(x = x1, xend = x2, y = y1, yend = y2, color = site), size = 7) +
scale_color_manual(values = gomPal, guide = "none") +
ggnewscale::new_scale_color() +
geom_segment(data = ofavGssSymPlot, aes(x = ord-0.5, xend = ord+0.5, color = cluster), y = -.07, yend = -.07, linewidth = 4) +
scale_color_manual(values = ofavKColPal[c(1,3,2,7)], name = "Lineage") +
coord_cartesian(ylim = c(0, 1), xlim = c(0.5, 33.5), clip = "off") +
scale_x_discrete(expand = c(0.005, 0.005)) +
scale_y_continuous(expand = c(0.001, 0.001)) +
facet_grid(factor(depth) ~ site, drop = TRUE, scales = "free", switch = "both", space = "free") +
geom_text(data = subset(ofavGssSymPlot, subset = otu == "Symbiodinium") %>% filter(sample %in% c("OGM001", "OGM073")), x = 15.5, y = -.205, aes(label = site), size = 4.4, color = "#000000") +
guides(fill = guide_legend(override.aes = list(size = 4), ncol = 1, order = 1), color = guide_legend(override.aes = list(size = 0.1), ncol = 1)) +
theme_bw()
ofavZooxPlot = ofavZooxPlotA + zooxTheme
ofavZooxPlot
ofavPlot1 = ofavStructure + inset_element(ofavImg, -0.1, 0.7, 0.3, 1, align_to = "full", ignore_tag = TRUE)
ofavPlot2 = (ofavPcaPlots) + plot_layout(guides = "collect")
ofavPlot3 = (ofavLineageViolin + theme(axis.text.y = element_text(margin = ggplot2::margin(r = -15, unit = "pt")))) + ofavHetPlot + ofavNuclDivPlot
ofavPlot4 = ofavAdmixPlot + ofavZooxPlot
ofavPlots = ofavPlot1 + ofavPlot2 + ofavPlot3 + ofavPlot4 +
plot_layout(heights = c(2,1,1,1)) +
plot_annotation(tag_levels = 'A') &
theme(plot.tag = element_text(size = 18),
legend.spacing = unit(-5, "pt"),
legend.key = element_blank(),
legend.background = element_blank())
ggsave("../figures/figure4.png", plot = ofavPlots, height = 14, width = 12, units = "in", dpi = 300)
ggsave("../figures/figure4.svg", plot = ofavPlots, height = 14, width = 12, units = "in", dpi = 300)
xestoBams = read.csv("../data/nwgomMetaData.csv") %>% dplyr::filter(species == "X. muta") %>% dplyr::filter(!sample %in% c("XGM193.2", "XGM193.3", "XGM203.1", "XGM203.3", "XGM158.1", "XGM158.2", "XGM149", "XGM118", "XGM132", "XGM117", "XGM099", "XGM110", "XGM179.2", "XGM179.3", "XGM072.1", "XGM072.3", "XGM034.1", "XGM034.3", "XGM013", "XGM067", "XGM129"))# list of bams files and their populations (tech reps removed)
xestoBams$sample <- gsub("\\.[1-3]*$", "", xestoBams$sample)
xestoBams$bank = factor(xestoBams$bank)
xestoBams$bank = factor(xestoBams$bank, levels = levels(xestoBams$bank)[c(5, 2, 1, 3, 4)])
xestoBams$depthZone = factor(xestoBams$depthZone)
xestoBams$depthZone = factor(xestoBams$depthZone, levels = levels(xestoBams$depthZone)[c(2, 1)])
xestoMa = as.matrix(read.table("../data/xesto/xestoNoClones.ibsMat")) # reads in IBS matrix produced by ANGSD
dimnames(xestoMa) = list(xestoBams[,3],xestoBams[,3])
## admixture K = 2
#-------------------------------------
xestoK2ad = read.table("../data/xesto/admix/xestoK2.output") %>% dplyr::select(V6, V7)
xestoK2ad %>% summarise(sum(V6),sum(V7))
## sum(V6) sum(V7)
## 1 116.3335 80.6665
xestoAdmixK2 = xestoBams %>%
dplyr::select(sample, bank, "depth" = depthZone, "depthm" = depthM) %>%
cbind(xestoK2ad) %>%
dplyr::rename("Xm1" = "V6", "Xm2" = "V7")
xestoAdmixK2_melt = melt(xestoAdmixK2, id = c("sample", "bank", "depth", "depthm"))
## admixture K = 3
#-------------------------------------
xestoK3ad = read.table("../data/xesto/admix/xestoK3.output") %>% dplyr::select(V6, V7, V8)
xestoK3ad %>% summarise(sum(V6),sum(V7), sum(V8))
## sum(V6) sum(V7) sum(V8)
## 1 66.6275 75.9908 54.3813
xestoAdmixK3 = xestoBams %>%
dplyr::select(sample, bank, "depth" = depthZone, "depthm" = depthM) %>%
cbind(xestoK3ad) %>%
dplyr::rename("Xm1" = "V6", "Xm2" = "V7", "Xm3" = "V8") %>%
dplyr::select(order(colnames(.)))
xestoAdmixK3_melt = melt(xestoAdmixK3, id = c("sample", "bank", "depth", "depthm"))
## admixture K = 4
#-------------------------------------
xestoK4ad = read.table("../data/xesto/admix/xestoK4.output") %>% dplyr::select(V6, V7, V8, V9)
xestoK4ad %>% summarise(sum(V6),sum(V7), sum(V8), sum(V9))
## sum(V6) sum(V7) sum(V8) sum(V9)
## 1 42.6422 72.7028 53.2008 28.4541
xestoAdmixK4 = xestoBams %>%
dplyr::select(sample, bank, "depth" = depthZone, "depthm" = depthM) %>%
cbind(xestoK4ad) %>%
dplyr::rename("Xm1" = "V6", "Xm2" = "V9", "Xm3" = "V7", "Xm4" = "V8") %>%
dplyr::select(order(colnames(.)))
xestoAdmixK4_melt = melt(xestoAdmixK4, id = c("sample", "bank", "depth", "depthm"))
## admixture K = 5
#-------------------------------------
xestoK5ad = read.table("../data/xesto/admix/xestoK5.output") %>% dplyr::select(V6, V7, V8, V9, V10)
xestoK5ad %>% summarise(sum(V6),sum(V7), sum(V8), sum(V9), sum(V10))
## sum(V6) sum(V7) sum(V8) sum(V9) sum(V10)
## 1 38.9413 39.2971 51.6064 28.8596 38.2956
xestoAdmixK5 = xestoBams %>%
dplyr::select(sample, bank, "depth" = depthZone, "depthm" = depthM) %>%
cbind(xestoK5ad) %>%
dplyr::rename("Xm1" = "V6", "Xm2" = "V9", "Xm3" = "V10", "Xm4" = "V7", "Xm5" = "V8") #%>%
xestoAdmixK5_melt = melt(xestoAdmixK5, id = c("sample", "bank", "depth", "depthm"))
## admixture K = 6
#-------------------------------------
xestoK6ad = read.table("../data/xesto/admix/xestoK6.output") %>% dplyr::select(V6, V7, V8, V9, V10,V11)
xestoK6ad %>% summarise(sum(V6),sum(V7), sum(V8), sum(V9), sum(V10), sum(V11))
## sum(V6) sum(V7) sum(V8) sum(V9) sum(V10) sum(V11)
## 1 23.1583 33.5617 48.5514 22.3351 41.7451 27.6487
xestoAdmixK6 = xestoBams %>%
dplyr::select(sample, bank, "depth" = depthZone, "depthm" = depthM) %>%
cbind(xestoK6ad) %>%
dplyr::rename("Xm1" = "V6", "Xm2" = "V9", "Xm3" = "V10", "Xm4" = "V11", "Xm5" = "V7", "Xm6" = "V8") #%>%
xestoAdmixK6_melt = melt(xestoAdmixK6, id = c("sample", "bank", "depth", "depthm"))
## construct figure
#-------------------------------------
# ggtree(tr) + geom_nodelab(aes(label = node), hjust = -0.5)
{
xestoTr = hclust(dist(xestoMa),"ave")
xestoP1 = ggtree(xestoTr, layout = "rectangular", size = 0.35)
xestoP2 = facet_plot(xestoP1, panel = "location", data=xestoAdmixK2, geom = geom_tile, mapping = aes(x = 1, fill = bank), color = "grey25", size = 0.1) +
scale_fill_manual("Site", values = gomPal, guide = guide_legend(order = 1)) +
new_scale_fill()
xestoP3 = facet_plot(xestoP2, panel = "depth zone", data=xestoAdmixK2, geom = geom_tile, mapping = aes(x = 1, fill = depth), color = "grey25", size = 0.1) +
scale_fill_manual("Depth zone", values = c("#A7E1BCFF", "#414083FF"), guide = guide_legend(order = 2)) +
new_scale_fill()
xestoP4 = facet_plot(xestoP3, panel = "depth", data=xestoAdmixK2, geom = geom_tile, mapping = aes(x = 1, fill = depthm), color = "grey25", size = 0.1) +
scale_fill_viridis_c("Depth (m)", option = "mako", trans = "reverse", limits = c(60, 10)) +
new_scale_fill()
xestoP5 = facet_plot(xestoP4, panel="K = 2", data=xestoAdmixK2_melt, geom=geom_barh, width = 1.0, mapping = aes(x = value, fill = variable), stat='identity', color = "grey25", size = 0.1, show.legend = FALSE) +
scale_fill_manual("Lineage",values = xestoKColPal[1:6])
xestoP6 = facet_plot( xestoP5, panel="K = 3", data=xestoAdmixK3_melt, geom=geom_barh, width = 1.0, mapping = aes(x = value, fill = variable), stat='identity', color = "grey25", size = 0.1, show.legend = FALSE)
xestoP7 = facet_plot(xestoP6, panel="K = 4", data=xestoAdmixK4_melt, geom=geom_barh, width = 1.0, mapping = aes(x = value, fill = variable), stat='identity', color = "grey25", size = 0.1, show.legend = FALSE)
xestoP8 = facet_plot(xestoP7, panel="K = 5", data=xestoAdmixK5_melt, geom=geom_barh, width = 1.0, mapping = aes(x = value, fill = variable), stat='identity', color = "grey25", size = 0.1, show.legend = FALSE)
xestoP9 = facet_plot(xestoP8, panel="K = 6", data=xestoAdmixK6_melt, geom=geom_barh, width = 1.0, mapping = aes(x = value, fill = variable), stat='identity', color = "grey25", size = 0.1, show.legend = FALSE) +
theme_tree(strip.text = element_blank(),
panel.spacing = unit(.1, "line"))
}
xestoImg = image_read("../figures/icons/xesto.png") %>% image_ggplot()
xestoStructure = facet_widths(xestoP9, widths = c(0.5, 0.025, 0.025, 0.025, 0.1, 0.1, 0.1, 0.1, 0.2))
xestoStructure
xestoData = read.csv("../data/nwgomMetaData.csv") %>% dplyr::filter(species == "X. muta") %>% dplyr::filter(!sample %in% c("XGM193.2", "XGM193.3", "XGM203.1", "XGM203.3", "XGM158.1", "XGM158.2", "XGM149", "XGM118", "XGM132", "XGM117", "XGM099", "XGM110", "XGM179.2", "XGM179.3", "XGM072.1", "XGM072.3", "XGM034.1", "XGM034.3", "XGM013", "XGM067", "XGM129"))
xestoData$depthZone = factor(xestoData$depthZone)
xestoData$depthZone = factor(xestoData$depthZone, levels(xestoData$depthZone)[c(2,1)])
xestoData$bank = factor(xestoData$bank)
xestoData$bank = factor(xestoData$bank,levels(xestoData$bank)[c(5, 2, 1, 4, 3)])
xestoPcadmix = read.table("../data/xesto/admix/xestoK6.output") %>%dplyr::select(V6, V7, V8, V9, V10, V11)
xestoPcadmix %>% summarise(sum(V6), sum(V7), sum(V8), sum(V9), sum(V10), sum(V11))
## sum(V6) sum(V7) sum(V8) sum(V9) sum(V10) sum(V11)
## 1 23.1583 33.5617 48.5514 22.3351 41.7451 27.6487
xestoPcadmix = xestoData %>% cbind(xestoPcadmix) %>% rename("cluster1" = "V6", "cluster2" = "V9", "cluster3" = "V10", "cluster4" = "V11", "cluster5" = "V7", "cluster6" = "V8") %>% dplyr::select(order(colnames(.)))
xestoPcadmixClust = xestoPcadmix %>% mutate(cluster = ifelse(cluster1 < 0.75 & cluster2 < 0.75 & cluster3 < 0.75 & cluster4 < 0.75 & cluster5 < 0.75 & cluster6 < 0.75, "NA", ifelse(cluster1 >=0.75, 1, ifelse(cluster2 >= 0.75, 2, ifelse(cluster3 >=0.75, 3, ifelse(cluster4 >= 0.75, 4, ifelse(cluster5 >=0.75, 5, ifelse(cluster6 >= 0.75, 6, 0))))))))
xestoPcadmix = xestoPcadmix %>% mutate(xestoPcadmixClust)
xestoPcadmix$cluster = as.factor(xestoPcadmix$cluster)
levels(xestoPcadmix$cluster) = c("Xm_Deep1", "Xm_Deep2", "Xm_Deep3", "Xm_Deep4", "Xm_Shal1", "Xm_Shal2", "Admixed")
xestoSiteLineages = xestoPcadmix %>% dplyr::select(depthZone, cluster) %>%
group_by(depthZone) %>% count(cluster) %>% mutate(Freq = n/sum(n)) %>% apply(2, function(x) gsub("\\s+", "", x)) %>% as.data.frame()
xestoSiteLineages
## depthZone cluster n Freq
## 1 Shallow Xm_Shal1 15 0.258620690
## 2 Shallow Xm_Shal2 38 0.655172414
## 3 Shallow Admixed 5 0.086206897
## 4 Mesophotic Xm_Deep1 20 0.143884892
## 5 Mesophotic Xm_Deep2 16 0.115107914
## 6 Mesophotic Xm_Deep3 34 0.244604317
## 7 Mesophotic Xm_Deep4 16 0.115107914
## 8 Mesophotic Xm_Shal1 7 0.050359712
## 9 Mesophotic Xm_Shal2 1 0.007194245
## 10 Mesophotic Admixed 45 0.323741007
xestoCov = read.table("../data/xesto/xestoPcangsd.cov") %>% as.matrix()
xestoPcAdmix = read.table("../data/xesto/admix/xestoK6.output") %>%dplyr::select(V6, V7, V8, V9, V10, V11)
xestoPcAdmix %>% summarise(sum(V6), sum(V7), sum(V8), sum(V9), sum(V10), sum(V11))
## sum(V6) sum(V7) sum(V8) sum(V9) sum(V10) sum(V11)
## 1 23.1583 33.5617 48.5514 22.3351 41.7451 27.6487
# xestoPcAdmix = xestoPcAdmix %>% rename("cluster1" = "V1", "cluster2" = "V2", "cluster3" = "V3", "cluster4" = "V4", "cluster5" = "V5", "cluster6" = "V6", "cluster7" = "V7") %>% dplyr::select(order(colnames(.)))
xestoPcAdmix = xestoPcAdmix %>% rename("Xm_Deep1" = "V6", "Xm_Deep2" = "V9", "Xm_Deep3" = "V10", "Xm_Deep4" = "V11", "Xm_Shal1" = "V7", "Xm_Shal2" = "V8") %>% dplyr::select(order(colnames(.)))
xestoPcaEig = eigen(xestoCov)
xestoPcaVar = xestoPcaEig$values/sum(xestoPcaEig$values)*100
head(xestoPcaVar)
## [1] 7.9030025 3.5636044 2.6138877 2.1696192 1.8047309 0.8699737
xestoPcangsd = read.csv("../data/nwgomMetaData.csv") %>% dplyr::filter(species == "X. muta") %>% dplyr::filter(!sample %in% c("XGM193.2", "XGM193.3", "XGM203.1", "XGM203.3", "XGM158.1", "XGM158.2", "XGM149", "XGM118", "XGM132", "XGM117", "XGM099", "XGM110", "XGM179.2", "XGM179.3", "XGM072.1", "XGM072.3", "XGM034.1", "XGM034.3", "XGM013", "XGM067", "XGM129")) %>% dplyr::select("sample" = sample, "bank" = bank, "depth" = depthZone, "depthm" = depthM)
xestoPcangsd$sitedepth = as.factor(paste(xestoPcangsd$bank, xestoPcangsd$depth, sep = " "))
xestoPcangsd$sitedepth = factor(xestoPcangsd$sitedepth, levels(xestoPcangsd$sitedepth)[c(7, 6, 3, 2, 1, 4, 5)])
xestoPcangsd$bank = factor(xestoPcangsd$bank)
xestoPcangsd$bank = factor(xestoPcangsd$bank, levels(xestoPcangsd$bank)[c(5, 2, 1, 3, 4)])
xestoPcangsd$depth = factor(xestoPcangsd$depth)
xestoPcangsd$depth = factor(xestoPcangsd$depth, levels(xestoPcangsd$depth)[c(2, 1)])
xestoPcangsd$PC1 = xestoPcaEig$vectors[,1]
xestoPcangsd$PC2 = xestoPcaEig$vectors[,2]
xestoPcangsd$PC3 = xestoPcaEig$vectors[,3]
xestoPcangsd$PC4 = xestoPcaEig$vectors[,4]
xestoPcangsdClust = xestoPcAdmix %>% mutate(cluster = ifelse(Xm_Deep1 < 0.75 & Xm_Deep2 < 0.75 & Xm_Deep3 < 0.75 & Xm_Deep4 < 0.75 & Xm_Shal1 < 0.75 & Xm_Shal2 < 0.75, "Admixed", ifelse(Xm_Deep1 >=0.75, "Xm_Deep1", ifelse(Xm_Deep2 >= 0.75, "Xm_Deep2", ifelse(Xm_Deep3 >=0.75, "Xm_Deep3", ifelse(Xm_Deep4 >= 0.75, "Xm_Deep4", ifelse(Xm_Shal1 >=0.75, "Xm_Shal1", ifelse(Xm_Shal2 >= 0.75, "Xm_Shal2", 0))))))))
# pcangsdClust$clusterX = as.vector(d_clust$classification)
xestoPcangsd = xestoPcangsd %>% mutate(xestoPcangsdClust)
xestoPcangsd$cluster = as.factor(xestoPcangsd$cluster)
xestoPcangsd$cluster = factor(xestoPcangsd$cluster, levels = levels(xestoPcangsd$cluster)[c(2:7,1)])
xestoBamsClusters = xestoPcangsd %>% dplyr::select(sample, cluster) %>% dplyr::arrange(sample)
xestoBamssamples = read.delim("../data/xesto/bamsNoClones", header = FALSE)
xestoBamsClusters$sample = xestoBamssamples$V1
# bamsClusters = as.data.frame(bamsClusters)
write.table(x = xestoBamsClusters, file = "../data/xesto/xestoBamsClusters", sep = "\t", row.names = FALSE, col.names = FALSE, quote = FALSE)
xestoPcangsd = merge(xestoPcangsd, aggregate(cbind(mean.x = PC1, mean.y = PC2)~sitedepth, xestoPcangsd, mean), by="sitedepth")
xestoPcangsd %>% dplyr::group_by(depth,cluster) %>% dplyr::summarize(n = n())
## `summarise()` has grouped output by 'depth'. You can override using the `.groups`
## argument.
## # A tibble: 10 × 3
## # Groups: depth [2]
## depth cluster n
## <fct> <fct> <int>
## 1 Shallow Xm_Shal1 15
## 2 Shallow Xm_Shal2 38
## 3 Shallow Admixed 5
## 4 Mesophotic Xm_Deep1 20
## 5 Mesophotic Xm_Deep2 16
## 6 Mesophotic Xm_Deep3 34
## 7 Mesophotic Xm_Deep4 16
## 8 Mesophotic Xm_Shal1 7
## 9 Mesophotic Xm_Shal2 1
## 10 Mesophotic Admixed 45
xestoPcaPlot12SA = ggplot() +
geom_hline(yintercept = 0, color = "gray90", size = 0.25) +
geom_vline(xintercept = 0, color = "gray90", size = 0.25) +
geom_point(data = xestoPcangsd, aes(x = PC1, y = PC2, fill = bank, shape = depth, color = bank), stroke = 0, size = 3, alpha = 0.5, show.legend = FALSE) +
geom_point(data = xestoPcangsd, aes(x = mean.x, y = mean.y, fill = bank, shape = depth), color = "black", size = 3.75, alpha = 1, stroke = 0.25) +
scale_shape_manual(values = c(21, 23), name = "Depth Zone") +
scale_fill_manual(values = gomPal, name = "Site") +
scale_color_manual(values = gomPal, name = "Site") +
labs(x = paste0("PC 1 (", format(round(xestoPcaVar[1], 1), nsmall = 1)," %)"), y = NULL) +
guides(shape = guide_legend(override.aes = list(size = 4, stroke = 0.25, alpha = NA), order = 2, ncol = 1), fill = guide_legend(override.aes = list(shape = 22, size = 4, fill = gomPal[c(1,2,3,4,5)], alpha = NA), order = 1, ncol = 1)) +
annotate(geom = "text", x = -0.1205, y = -0.017, label = paste0("PC 2 (", format(round(xestoPcaVar[2], 1), nsmall = 1), " %)"), angle = 90, size = 3.65) +
coord_cartesian(xlim = c(-0.1, 0.12), clip = "off") +
theme_bw()
xestoPcaPlot12S = xestoPcaPlot12SA +
pcaTheme +
theme(legend.position = c(0.1, 0.24))
xestoPcaPlot12S
xestoPcaPlot12LA = ggplot() +
geom_hline(yintercept = 0, color = "gray90", size = 0.5) +
geom_vline(xintercept = 0, color = "gray90", size = 0.5) +
geom_point(data = xestoPcangsd, aes(x = PC1, y = PC2, fill = cluster, shape = depth), color = "black", size = 3, alpha = 1, show.legend = TRUE) +
scale_shape_manual(values = c(21, 23), name = "Depth Zone") +
scale_fill_manual(values = xestoKColPal, name = "Lineage") +
labs(x = paste0("PC 1 (", format(round(xestoPcaVar[1], 1), nsmall = 1)," %)"), y = paste0("PC 2 (", format(round(xestoPcaVar[2], 1), nsmall = 1), " %)")) +
guides(shape = "none", fill = guide_legend(override.aes = list(shape = 22, size = 4, alpha = NA), order = 1, ncol = 1))+
coord_cartesian(xlim = c(-0.1, 0.12), clip = "off") +
theme_bw()
xestoPcaPlot12L = xestoPcaPlot12LA +
pcaTheme +
theme(legend.position = c(0.9, 0.2))
Put all plots together
xestoPcaPlots = ((xestoPcaPlot12S + theme(axis.title.y = element_text(margin = ggplot2::margin(r = -20, unit = "pt")))) | xestoPcaPlot12L) +
plot_annotation(tag_levels = 'A') &
theme(plot.tag = element_text(size = 18),
panel.background = element_rect(fill = "white"),
legend.spacing = unit(-5, "pt"),
legend.key = element_blank(),
legend.background = element_blank())
xestoPcaPlots
Prepare admixture outputs for plotting
xestoAdmix = xestoPcangsd %>%dplyr::select(-PC1, -PC2, -PC3, -PC4, -cluster, -depthm, -mean.x, -mean.y)
xestoAdmix$bank = factor(xestoAdmix$bank)
xestoAdmix = arrange(xestoAdmix, bank, depth, -Xm_Deep1, -Xm_Deep4, -Xm_Shal2)
xestoPopCounts = xestoAdmix %>% group_by(bank, depth) %>% summarise(n = n())
## `summarise()` has grouped output by 'bank'. You can override using the `.groups`
## argument.
xestoPopCounts
## # A tibble: 7 × 3
## # Groups: bank [5]
## bank depth n
## <fct> <fct> <int>
## 1 WFGB Shallow 29
## 2 WFGB Mesophotic 29
## 3 EFGB Shallow 29
## 4 EFGB Mesophotic 26
## 5 Bright Mesophotic 25
## 6 Geyer Mesophotic 30
## 7 McGrail Mesophotic 29
xestoPopCountList = reshape2::melt(lapply(xestoPopCounts$n,function(x){c(1:x)}))
xestoAdmix$ord = xestoPopCountList$value
xestoAdmixMelt = melt(xestoAdmix, id.vars=c("sample", "bank", "depth", "sitedepth", "ord"), variable.name="Ancestry", value.name="Fraction")
xestoAdmixMelt$Ancestry = factor(xestoAdmixMelt$Ancestry)
xestoAdmixMelt$Ancestry = factor(xestoAdmixMelt$Ancestry, levels = rev(levels(xestoAdmixMelt$Ancestry)))
xestoPopAnno = data.frame(x1 = c(0.5, 0.5, 0.5, 0.5, 0.5), x2 = c(30.5, 30.5, 30.5, 30.5, 30.5),
y1 = -0.1, y2 = -0.1, sample = NA, Ancestry = NA, depth = "Mesophotic",
ord = NA, Fraction = NA,
bank = c("WFGB", "EFGB", "Bright", "Geyer", "McGrail"))
xestoPopAnno$bank = factor(xestoPopAnno$bank)
xestoPopAnno$bank = factor(xestoPopAnno$bank, levels = levels(xestoPopAnno$bank)[c(5, 2, 1, 3, 4)])
Make admixture plots
xestoAdmixPlotA = ggplot(data = xestoAdmixMelt, aes(x = ord, y = Fraction, fill = Ancestry, order = sample)) +
geom_segment(data = xestoPopAnno, aes(x = x1, xend = x2, y = -.1, yend = -.1, color = bank), size = 7) +
geom_bar(stat = "identity", position = "fill", width = 1, colour = "grey25", size = 0.2) +
facet_grid(factor(depth) ~ bank, switch = "both", scales = "free_x") +
geom_text(data = (xestoAdmixMelt %>% filter(depth == "Mesophotic", bank %in% c("WFGB", "EFGB", "Bright", "Geyer", "McGrail"), sample %in% c("XGM101", "XGM135", "XGM078", "XGM177", "XGM058"), Ancestry == "Xm_Deep1")), x = 15.5, y = -.1, aes(label = bank), size = 4, color = "black") +
scale_fill_manual(values = xestoKColPal[c(1:6)]) +
scale_color_manual(values = gomPal) +
scale_x_discrete(expand = c(0.005, 0.005)) +
scale_y_continuous(expand = c(0.001, 0.001)) +
coord_cartesian(ylim = c(0.0, 1.0), clip = "off") +
theme_bw()
xestoAdmixPlot = xestoAdmixPlotA +
theme_bw()+
admixTheme
xestoAdmixPlot
leveneTest(lm(depthm ~ cluster, data = xestoPcangsd))
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 6 5.0923 0.00007161 ***
## 190
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
xestoDepthAov = welch_anova_test(depthm ~ cluster, data = subset(xestoPcangsd, subset = xestoPcangsd$cluster!="Admixed"))
xestoDF = xestoDepthAov$statistic[[1]]
xestoDepthPH = games_howell_test(depthm ~ cluster, data = xestoPcangsd, conf.level = 0.95) %>% as.data.frame()
xestoDepthComps = paste(xestoDepthPH$group1,"-",xestoDepthPH$group2, sep ="")
# xestoComps = c("D1-D2", "D1-D3", "D1-D4", "D1-S1", "D1-S2", "D1-A",)
xestoDepthPs = xestoDepthPH$p.adj
xmDepthMultComp = xestoDepthPs < .05
names(xmDepthMultComp) = xestoDepthComps
xmDepthLet = multcompLetters(xmDepthMultComp)
xmDepthLetters = xmDepthLet$Letters %>% as.data.frame()
xestoDepthLetters = data.frame(x = factor(rownames(xmDepthLetters)), y = 5, grp = xmDepthLetters$.)
xestoLineageViolinA = ggplot(data = xestoPcangsd, aes(x = cluster, y = depthm)) +
annotate(geom = "rect", xmin = -Inf, xmax = Inf, ymin = 30, ymax = Inf, fill = "black", alpha = 0.10, color = NA) +
geom_violin(aes(fill = cluster),adjust = 1, linewidth = 0, color = "black", alpha = 0.75, width = 0.7, trim = F, scale = "width") +
geom_beeswarm(aes(fill = cluster), shape = 21, size = 2, cex = 1.5, alpha = 0.5) +
geom_violin(adjust = 1, linewidth = 0.4, color = "black", alpha = 1, width = 0.7, trim = F, fill = NA, scale = "width") +
geom_boxplot(width = 0.2, color = "black", fill = "white", outlier.colour = NA, linewidth = 0.6, alpha = 0.5) +
geom_segment(data = ticks[c(1:13),], aes(x = x, xend = xend, y = y)) +
annotate(geom = "text", x = 2.75, y =65, label = bquote(italic("F")~" = "~.(xestoDF)*"; "~italic("p")~" < 0.001"), size = 3) +
annotate(geom = "text", x = xestoDepthLetters$x, y = xestoDepthLetters$y, label = xestoDepthLetters$grp) +
scale_fill_discrete(type = xestoKColPal, name = "Lineage") +
scale_color_discrete(type = xestoKColPal, name = "Lineage") +
xlab("Lineage") +
ylab("Depth (m)") +
scale_y_reverse(breaks = seq(5, 90, 5)) +
coord_cartesian(xlim = c(0.75, 7.25), clip = "off") +
theme_bw()
xestoLineageViolin = xestoLineageViolinA + violinTheme + theme(axis.text.x = element_text(color = "black", size = 10, angle = 45, hjust = 0.9))
xestoLineageViolin
Measuring with global weighted FST calculated from SFS
First prepare and sort FST for plotting
xm.pops = c("Xm_Deep1", "Xm_Deep2", "Xm_Deep3", "Xm_Deep4", "Xm_Shal1", "Xm_Shal2")
# reads in fst
xmFstMa1 <- read.delim("../data/xesto/xestoKFst.out") %>% dplyr::select(-fst) %>% df_to_pw_mat(., "pop1", "pop2", "weightedFst")
colnames(xmFstMa1) = xm.pops
rownames(xmFstMa1) = xm.pops
xmFstMa = xmFstMa1
upperTriangle(xmFstMa, byrow = TRUE) <- lowerTriangle(xmFstMa)
xmFstMa <- xmFstMa[,xm.pops] %>%
.[xm.pops,]
xmFstMa[lower.tri(xmFstMa)] <- NA
xmFstMa <- as.data.frame(xmFstMa)
# rearrange and reformat matrix
xmFstMa$Pop = factor(row.names(xmFstMa), levels = unique(xm.pops))
# melt matrix data (turn from matrix into long dataframe)
xmFst = melt(xmFstMa, id.vars = "Pop", value.name = "Fst", variable.name = "Pop2", na.rm = FALSE)
xmFst$Fst = round(xmFst$Fst, 3)
xmFst$site = xmFst$Pop
xmFst$site = factor(xmFst$site)
xmFst$site2 = xmFst$Pop2
Construct a heatmap of FST values
xmFstHeatmapA = ggplot(data = xmFst %>% filter(Fst != 0), aes(Pop, Pop2, fill = as.numeric(as.character(Fst)))) +
geom_tile(color = "white") +
geom_segment(data = xmFst, aes(x = 0.475, xend = 0.25, y = Pop2, yend = Pop2, color = site2), size = 11.5) + #colored boxes for Y-axis labels
geom_segment(data = xmFst, aes(x = Pop, xend = Pop, y = 0, yend = 0.475, color = site), size = 22.5) + #colored boxes for X-axis labels
scale_color_manual(values = xestoKColPal, guide = NULL) +
scale_fill_gradient(low = "white", high = "#EA526F", limit = c(0, 0.165), space = "Lab", name = expression(paste(italic("F")[ST])), na.value = NA, guide = "colourbar") +
geom_text(data = xmFst %>% filter(Fst != 0), aes(Pop, Pop2, label = Fst), color = "black", size = 3.5, fontface = "bold") +
guides(fill = guide_colorbar(barwidth = 7.5, barheight = 0.75, title.position = "top", title.hjust = 0.5, direction = "horizontal", ticks.colour = "black", frame.colour = "black")) +
scale_y_discrete(position = "left", limits = rev(levels(xmFst$Pop2))) +
scale_x_discrete(limits = levels(xmFst$Pop2)[c(1:6)]) +
coord_cartesian(xlim = c(1, 6), ylim = c(1, 6), clip = "off") +
theme_minimal()
xmFstHeatmap = xmFstHeatmapA + theme(
axis.text.x = element_text(vjust = 13, size = 8, hjust = 0.5, color = "black"),
axis.text.y = element_text(size = 5, color = "black", angle = 90, hjust = 0.5, vjust = -5),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_blank(),
axis.ticks = element_blank(),
legend.title = element_text(size = 8, color = "black"),
legend.text = element_text(size = 8, color = "black"),
legend.position = c(0.6, 0.9),
plot.background = element_blank(),
panel.background = element_blank(),
)
xmFstHeatmap
xestoData = read.csv("../data/nwgomMetaData.csv") %>% dplyr::filter(species == "X. muta") %>% dplyr::filter(!sample %in% c("XGM193.2", "XGM193.3", "XGM203.1", "XGM203.3", "XGM158.1", "XGM158.2", "XGM149", "XGM118", "XGM132", "XGM117", "XGM099", "XGM110", "XGM179.2", "XGM179.3", "XGM072.1", "XGM072.3", "XGM034.1", "XGM034.3", "XGM013", "XGM067", "XGM129"))
xestoHetData = read.delim("../data/xesto/xestoHet.out", header = FALSE, sep = " ") %>% mutate(sample = xestoData$sample) %>% rename("Het" = "V2") %>% left_join(xestoPcangsd) %>% select(sample, Het, cluster)
## Joining with `by = join_by(sample)`
xestoHet = xestoData %>% left_join(xestoHetData)
## Joining with `by = join_by(sample)`
xestoHetAOV = welch_anova_test(xestoHet, Het ~ cluster)
xestoHetAOV$p
## [1] 0.000000374
xestoHetPH = games_howell_test(xestoHet, Het ~ cluster)
xestoHetPH
## # A tibble: 21 × 8
## .y. group1 group2 estimate conf.low conf.high p.adj p.adj.signif
## * <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Het Xm_Deep1 Xm_Deep2 -0.0000100 -0.0000877 0.0000677 1 ns
## 2 Het Xm_Deep1 Xm_Deep3 0.000108 0.0000432 0.000172 0.0000885 ****
## 3 Het Xm_Deep1 Xm_Deep4 0.0000338 -0.0000671 0.000135 0.926 ns
## 4 Het Xm_Deep1 Xm_Shal1 0.0000582 -0.0000151 0.000131 0.198 ns
## 5 Het Xm_Deep1 Xm_Shal2 0.0000477 -0.00000736 0.000103 0.129 ns
## 6 Het Xm_Deep1 Admixed 0.000128 0.0000615 0.000195 0.0000035 ****
## 7 Het Xm_Deep2 Xm_Deep3 0.000118 0.0000360 0.000199 0.001 ***
## 8 Het Xm_Deep2 Xm_Deep4 0.0000438 -0.0000666 0.000154 0.864 ns
## 9 Het Xm_Deep2 Xm_Shal1 0.0000682 -0.0000198 0.000156 0.219 ns
## 10 Het Xm_Deep2 Xm_Shal2 0.0000577 -0.0000177 0.000133 0.221 ns
## # ℹ 11 more rows
xestoHetPH$p.adj
## [1] 1.0000000 0.0000885 0.9260000 0.1980000 0.1290000 0.0000035 0.0010000 0.8640000
## [9] 0.2190000 0.2210000 0.0001520 0.2920000 0.4520000 0.0580000 0.9770000 0.9900000
## [17] 0.9990000 0.1000000 0.9990000 0.1200000 0.0040000
xestoComps = paste(xestoHetPH$group1,"-",xestoHetPH$group2, sep ="")
# xestoComps = c("D1-D2", "D1-D3", "D1-D4", "D1-S1", "D1-S2", "D1-A",)
xestoPs = xestoHetPH$p.adj
xmMultComp = xestoPs < .05
names(xmMultComp) = xestoComps
xmLet = multcompLetters(xmMultComp)
xmLetters = xmLet$Letters %>% as.data.frame()
xestoHetLetters = data.frame(x = factor(rownames(xmLetters)), y = 0.001, grp = xmLetters$.)
xestoHetPlot = ggplot(data = xestoHet, aes(x = cluster, y = Het)) +
geom_violin(aes(fill = cluster, group = cluster), adjust = 1, linewidth = 0.4, color = "black", alpha = 1, trim = FALSE, scale = "area", width = 1) +
geom_beeswarm(aes(fill = cluster), shape = 21, cex = 0.5) +
geom_boxplot(aes(fill = cluster, group = cluster), width = 0.2, color = "black", fill = "white", outlier.colour = NA, linewidth = 0.4, alpha = 0.5) + xlab("Lineage") +
geom_text(data = xestoHetLetters, aes(x = x, y = y, label = grp), size = 4) +
annotate(geom = "text", x = 2.2, y = 0, label = bquote(italic("F")~" = "~.(xestoHetAOV$statistic)*"; "~italic("p")~" < 0.001"), size = 3) +
scale_fill_discrete(type = xestoKColPal, name = "Lineage") +
xlab("Lineage") +
ylab("Heterozygosity") +
scale_y_continuous(breaks = seq(0, 0.001, 0.0002)) +
coord_cartesian(expand = TRUE, xlim = c(0.85, 7.15), ylim = c(0, 0.001)) +
theme_bw() +
violinTheme +
theme(axis.text.x = element_text(color = "black", size = 10, angle = 45, hjust = 0.9), axis.ticks.y = element_line(color = "black"))
xestoHetPlot
xestoNpgList = list(read_tsv("../data/xesto/Xm_Deep1.thetas.idx.pestPG") %>% mutate(lineage = "Xm_Deep1"),
read_tsv("../data/xesto/Xm_Deep2.thetas.idx.pestPG") %>% mutate(lineage = "Xm_Deep2"),
read_tsv("../data/xesto/Xm_Deep3.thetas.idx.pestPG") %>% mutate(lineage = "Xm_Deep3"),
read_tsv("../data/xesto/Xm_Deep4.thetas.idx.pestPG") %>% mutate(lineage = "Xm_Deep4"),
read_tsv("../data/xesto/Xm_Shal1.thetas.idx.pestPG") %>% mutate(lineage = "Xm_Shal1"),
read_tsv("../data/xesto/Xm_Shal2.thetas.idx.pestPG") %>% mutate(lineage = "Xm_Shal2")
)
## Rows: 7605 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): #(indexStart,indexStop)(firstPos_withData,lastPos_withData)(WinStart,WinSt...
## dbl (12): WinCenter, tW, tP, tF, tH, tL, Tajima, fuf, fud, fayh, zeng, nSites
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7605 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): #(indexStart,indexStop)(firstPos_withData,lastPos_withData)(WinStart,WinSt...
## dbl (12): WinCenter, tW, tP, tF, tH, tL, Tajima, fuf, fud, fayh, zeng, nSites
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7605 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): #(indexStart,indexStop)(firstPos_withData,lastPos_withData)(WinStart,WinSt...
## dbl (12): WinCenter, tW, tP, tF, tH, tL, Tajima, fuf, fud, fayh, zeng, nSites
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7605 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): #(indexStart,indexStop)(firstPos_withData,lastPos_withData)(WinStart,WinSt...
## dbl (12): WinCenter, tW, tP, tF, tH, tL, Tajima, fuf, fud, fayh, zeng, nSites
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7605 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): #(indexStart,indexStop)(firstPos_withData,lastPos_withData)(WinStart,WinSt...
## dbl (12): WinCenter, tW, tP, tF, tH, tL, Tajima, fuf, fud, fayh, zeng, nSites
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7605 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): #(indexStart,indexStop)(firstPos_withData,lastPos_withData)(WinStart,WinSt...
## dbl (12): WinCenter, tW, tP, tF, tH, tL, Tajima, fuf, fud, fayh, zeng, nSites
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
xestoPiAll = purrr::reduce(xestoNpgList, rbind) %>%
dplyr::group_by(lineage) %>%
filter(tP != 0) %>%
mutate(tPps = tP/nSites) %>%
dplyr::summarize(tPps = mean(tPps))
xestoPiAll$Ne = (xestoPiAll$tPps)/(4*2e-8)
xestoPiAll
## # A tibble: 6 × 3
## lineage tPps Ne
## <chr> <dbl> <dbl>
## 1 Xm_Deep1 0.00451 56318.
## 2 Xm_Deep2 0.00427 53318.
## 3 Xm_Deep3 0.00451 56349.
## 4 Xm_Deep4 0.00404 50545.
## 5 Xm_Shal1 0.00426 53241.
## 6 Xm_Shal2 0.00456 56975.
xestoPiAll$lineage = factor(xestoPiAll$lineage)
xestoNuclDivPlot = ggplot(xestoPiAll, aes(x = lineage, y = tPps, fill = lineage, group = lineage)) +
geom_bar(position = position_dodge2(preserve = "single"), stat = "identity", color = "black") +
scale_fill_manual(values = xestoKColPal) +
geom_text(position = position_dodge2(preserve = "single", width = 0.9), aes(y = tPps-.0015, label= scales::comma(round(Ne,0)), hjust = 0, fontface = "bold"), angle = 90, color = "black", ) +
labs(x = "Lineage", y = bquote("Nucleotide diversity ("*pi*")")) +
theme_bw() +
violinTheme +
theme(axis.text.x = element_text(color = "black", size = 10, angle = 45, hjust = 0.9))#,
#axis.text.x = ggtext::element_markdown())
xestoNuclDivPlot
xestoPlot1 = xestoStructure + inset_element(xestoImg, -0.1, 0.7, 0.3, 1, align_to = "full", ignore_tag = TRUE)
xestoPlot2 = xestoPcaPlots + plot_layout(guides = "collect")
xestoPlot3 = (xestoLineageViolin + theme(axis.text.y = element_text(margin = ggplot2::margin(r = -15, unit = "pt")))) + xestoHetPlot + xestoNuclDivPlot
xestoPlot4 = xestoAdmixPlot + xmFstHeatmap
xestoPlots = xestoPlot1 + xestoPlot2 + xestoPlot3 + xestoPlot4 +
plot_layout(heights = c(2,1,1,1)) +
plot_annotation(tag_levels = 'A') &
theme(plot.tag = element_text(size = 18),
legend.spacing = unit(-5, "pt"),
legend.key = element_blank(),
legend.background = element_blank())
ggsave("../figures/figure5.png", plot = xestoPlots, height = 14, width = 11, units = "in", dpi = 300)
ggsave("../figures/figure5.svg", plot = xestoPlots, height = 14, width = 11, units = "in", dpi = 300)
Checking deviance among model runs from BayesAss we ran on HPC
# fileList = substr(list.files("../data/snps/bayesAss/", "BA3trace.*.txt$"),1,10)
fileList = substr(list.files("../data/mcav/BA3/", "BA3trace.*.txt$"),1,15)
bayesian_deviance <- function(trace, burnin = 0, sampling.interval = 0){
if(burnin == 0) stop('No burnin specified')
if(sampling.interval == 0) stop('No sampling interval specified')
range <- (trace$State > burnin & trace$State %% sampling.interval == 0)
D <- -2*mean(trace$LogProb[range])
return(D)
}
for(i in 1:length(fileList)){
assign(fileList[i], read.delim(paste("../data/mcav/BA3/", fileList[i], ".txt", sep = ""))) %>% dplyr::select(-last_col())
print(paste(fileList[i], bayesian_deviance(get(fileList[i]), burnin = 10000000, sampling.interval = 1000)))
}
## [1] "mcavBA3trace.01 669318.0188"
## [1] "mcavBA3trace.02 668830.0212"
## [1] "mcavBA3trace.03 669141.0585"
## [1] "mcavBA3trace.04 668427.0425"
## [1] "mcavBA3trace.05 668596.3534"
## [1] "mcavBA3trace.06 668835.2774"
## [1] "mcavBA3trace.07 668961.4509"
## [1] "mcavBA3trace.08 668469.9586"
## [1] "mcavBA3trace.09 668742.6304"
## [1] "mcavBA3trace.10 668837.7336"
# [1] "mcavBA3trace.01 669318.0188"
# [1] "mcavBA3trace.02 668830.0212"
# [1] "mcavBA3trace.03 669141.0585"
# [1] "mcavBA3trace.04 668427.0425"
# [1] "mcavBA3trace.05 668596.3534"
# [1] "mcavBA3trace.06 668835.2774"
# [1] "mcavBA3trace.07 668961.4509"
# [1] "mcavBA3trace.08 668469.9586"
# [1] "mcavBA3trace.09 668742.6304"
# [1] "mcavBA3trace.10 668837.7336"
All traces have similar deviance (this is good!). Using the trace with the lowest deviance (BA3trace.02.txt, in this case)
mcavBayesAss = read.delim("../data/mcav/BA3/mcavBA3trace.04.txt") %>% filter(State > 10000000) %>% dplyr::select(-State, -LogProb, -X)
mcavBaMean = mcavBayesAss %>% summarise(across(everything(), list(mean))) %>% t() %>% as_tibble() %>% rename(., mean=V1) %>% mutate(pops = colnames(mcavBayesAss))
mcavBaSumm = mcavBayesAss %>% summarise(across(everything(), list(median))) %>% t() %>% as.data.frame() %>% rename(., median=V1) %>% mutate(pops = mcavBaMean$pops, mean = round(mcavBaMean$mean, 3)) %>% relocate(median, .after = mean)
mcavBaSumm$median = round(mcavBaSumm$median, 3)
mcavBaHpd =as.data.frame(t(sapply(mcavBayesAss, emp.hpd)))
colnames(mcavBaHpd) = c("hpdLow", "hpdHigh")
mcavBaHpd$pops = rownames(mcavBaHpd)
mcavESS = as.data.frame(sapply(mcavBayesAss, ESS))
colnames(mcavESS) = "ESS"
mcavBaSumm = mcavBaSumm %>% left_join(mcavBaHpd)
## Joining with `by = join_by(pops)`
mcavBaSumm$hpdLow = round(mcavBaSumm$hpdLow, 3)
mcavBaSumm$hpdHigh = round(mcavBaSumm$hpdHigh, 3)
mcavBaSumm$ESS = mcavESS$ESS
### FROM BAYESASS: ###
## Population Index -> Population Label:
## 0->Geyer_Mesophotic 1->EFGB_Mesophotic
## 2->EFGB_Shallow 3->WFGB_Mesophotic
## 4->WFGB_Shallow 5->Bright_Mesophotic
## 6->McGrail_Mesophotic
popi = rep(c("Geyer\nMesophotic", "EFGB\nMesophotic", "EFGB\nShallow", "WFGB\nMesophotic", "WFGB\nShallow", "Bright\nMesophotic", "McGrail\nMesophotic"), each = 7)
popj = rep(c("Geyer\nMesophotic", "EFGB\nMesophotic", "EFGB\nShallow", "WFGB\nMesophotic", "WFGB\nShallow", "Bright\nMesophotic", "McGrail\nMesophotic"), times = 7)
mcavBaSumm = mcavBaSumm %>% mutate(pop.i = popi, pop.j = popj) %>% relocate(c(pop.i, pop.j), .after = pops) %>% dplyr::select(-pops)
mcavBaSumm$pop.i = factor(mcavBaSumm$pop.i)
mcavBaSumm$pop.i = factor(mcavBaSumm$pop.i, levels = levels(mcavBaSumm$pop.i)[c(7, 3, 6, 2, 1, 4, 5)])
mcavBaSumm$pop.j = factor(mcavBaSumm$pop.j)
mcavBaSumm$pop.j = factor(mcavBaSumm$pop.j, levels = levels(mcavBaSumm$pop.j)[c(7, 3, 6, 2, 1, 4, 5)])
mcavBaSumm$site.i = word(mcavBaSumm$pop.i, 1, sep = "\n")
mcavBaSumm$site.i = factor(mcavBaSumm$site.i)
mcavBaSumm$site.i = factor(mcavBaSumm$site.i, levels = levels(mcavBaSumm$site.i)[c(5, 2, 1, 3, 4)])
mcavBaSumm$site.j = word(mcavBaSumm$pop.j, 1, sep = "\n")
mcavBaSumm$site.j = factor(mcavBaSumm$site.j)
mcavBaSumm$site.j = factor(mcavBaSumm$site.j, levels = levels(mcavBaSumm$site.j)[c(5, 2, 1, 3, 4)])
mcavBaSumm$depth.i = word(mcavBaSumm$pop.i, 2, sep = "\n")
mcavBaSumm$depth.i = factor(mcavBaSumm$depth.i)
mcavBaSumm$depth.i = factor(mcavBaSumm$depth.i, levels = levels(mcavBaSumm$depth.i)[c(2, 1)])
mcavBaSumm$depth.j = word(mcavBaSumm$pop.j, 2, sep = "\n")
mcavBaSumm$depth.j = factor(mcavBaSumm$depth.j)
mcavBaSumm$depth.j = factor(mcavBaSumm$depth.j, levels = levels(mcavBaSumm$depth.j)[c(2, 1)])
#All sites (excluding self retention)
mcavBaMeans = mcavBaSumm %>% filter(pop.i != pop.j) %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Global")
#mesophotic sources
mcavBaMeans = mcavBaSumm %>% filter(pop.i != pop.j, depth.j == "Mesophotic") %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Mesophotic Source") %>% bind_rows(mcavBaMeans, .)
#shallow sources
mcavBaMeans = mcavBaSumm %>% filter(pop.i != pop.j, depth.j == "Shallow") %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Shallow Source") %>% bind_rows(mcavBaMeans, .)
#mesophotic sinks
mcavBaMeans = mcavBaSumm %>% filter(pop.i != pop.j, depth.i == "Mesophotic") %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Mesophotic Sink") %>% bind_rows(mcavBaMeans, .)
#shallow sinks
mcavBaMeans = mcavBaSumm %>% filter(pop.i != pop.j, depth.i == "Shallow") %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Shallow Sink") %>% bind_rows(mcavBaMeans, .)
#mesophotic -> shallow
mcavBaMeans = mcavBaSumm %>% filter(pop.i != pop.j, depth.j == "Mesophotic", depth.i == "Shallow") %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Mesophotic -> Shallow") %>% bind_rows(mcavBaMeans, .)
#mesophotic -> mesophotic
mcavBaMeans = mcavBaSumm %>% filter(pop.i != pop.j, depth.j == "Mesophotic", depth.i == "Mesophotic") %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Mesophotic -> Mesophotic") %>% bind_rows(mcavBaMeans, .)
#shallow -> mesophotic
mcavBaMeans = mcavBaSumm %>% filter(pop.i != pop.j, depth.j == "Shallow", depth.i == "Mesophotic") %>% summarise(mean = mean(.$mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Shallow -> Mesophotic") %>% bind_rows(mcavBaMeans, .)
#shallow -> shallow
mcavBaMeans = mcavBaSumm %>% filter(pop.i != pop.j, depth.j == "Shallow", depth.i == "Shallow") %>% summarise(mean = round(mean(.$mean), 5), sd = round(sd(.$mean), 5), se = round(sd(.$mean)/sqrt(nrow(.)), 3)) %>% mutate(dataset = paste("Shallow -> Shallow")) %>% bind_rows(mcavBaMeans, .) %>% relocate(dataset, .before = mean) %>% as.data.frame()
mcavBaMeans[,c(2:4)] = mcavBaMeans[,c(2:4)] %>% round(4)
mcavBaMeans
## dataset mean sd se
## 1 Global 0.0437 0.0610 0.0094
## 2 Mesophotic Source 0.0203 0.0113 0.0021
## 3 Shallow Source 0.1021 0.0913 0.0264
## 4 Mesophotic Sink 0.0485 0.0608 0.0111
## 5 Shallow Sink 0.0315 0.0626 0.0181
## 6 Mesophotic -> Shallow 0.0138 0.0075 0.0024
## 7 Mesophotic -> Mesophotic 0.0235 0.0116 0.0026
## 8 Shallow -> Mesophotic 0.0985 0.0864 0.0273
## 9 Shallow -> Shallow 0.1200 0.1542 0.1090
mcavBaMeansPub = mcavBaMeans %>%
flextable() %>%
flextable::compose(part = "header", j = "dataset", value = as_paragraph("Dataset")) %>%
flextable::compose(part = "header", j = "mean", value = as_paragraph(as_i("m"))) %>%
flextable::compose(part = "header", j = "sd", value = as_paragraph("SD")) %>%
flextable::compose(part = "header", j = "se", value = as_paragraph("SEM")) %>%
flextable::font(fontname = "Times New Roman", part = "all") %>%
flextable::fontsize(size = 10, part = "all") %>%
flextable::bold(part = "header") %>%
flextable::align(align = "left", part = "all") %>%
flextable::autofit()
table = read_docx()
table = body_add_flextable(table, value = mcavBaMeansPub)
print(table, target = "../tables/table1.docx")
mcavBaMeansPub
Dataset | m | SD | SEM |
|---|---|---|---|
Global | 0.0437 | 0.0610 | 0.0094 |
Mesophotic Source | 0.0203 | 0.0113 | 0.0021 |
Shallow Source | 0.1021 | 0.0913 | 0.0264 |
Mesophotic Sink | 0.0485 | 0.0608 | 0.0111 |
Shallow Sink | 0.0315 | 0.0626 | 0.0181 |
Mesophotic -> Shallow | 0.0138 | 0.0075 | 0.0024 |
Mesophotic -> Mesophotic | 0.0235 | 0.0116 | 0.0026 |
Shallow -> Mesophotic | 0.0985 | 0.0864 | 0.0273 |
Shallow -> Shallow | 0.1200 | 0.1542 | 0.1090 |
mcavBaSumm$mean = sprintf('%.3f', mcavBaSumm$mean)
mcavBaSumm$mean2 = mcavBaSumm$mean
mcavBaSumm$hpdLow = sprintf('%.3f', mcavBaSumm$hpdLow)
mcavBaSumm$hpdHigh = sprintf('%.3f', mcavBaSumm$hpdHigh)
mcavBaLabs = tibble(pop.i = unique(mcavBaSumm$pop.i), pop.j = unique(mcavBaSumm$pop.j))
mcavMigrateA = ggplot(data = mcavBaSumm, aes(pop.i, pop.j))+
geom_tile(data = subset(mcavBaSumm, subset = mcavBaSumm$mean2>0.65), fill = "gray35", color = "white") +
geom_segment(data = mcavBaSumm, aes(x = 0.4755, xend = -0.42, y = pop.j, yend = pop.j, color = pop.j), size = 14) +
geom_segment(data = mcavBaSumm, aes(x = pop.i, xend = pop.i, y = 0.45, yend = -0.425, color = pop.i), size = 32) +
scale_color_manual(values = gomPal[c(1:2, 1:5)], guide = NULL) +
guides(fill = guide_colorbar(ticks.colour = "black", barwidth = 1, barheight = 10, frame.colour = "black")) +
# new_scale("fill") +
geom_tile(data = subset(mcavBaSumm, subset = mcavBaSumm$mean<0.65), aes(fill = as.numeric(as.character(mean))), color = "white") +
scale_fill_gradientn(colours = paletteer_c("viridis::mako", n = 10, direction = -1)[c(1:7)], limit = c(0,0.23), space = "Lab", name = expression(paste(italic("m"))), na.value = "transparent", guide = "colourbar", values = c(0, 0.05, 0.1, 0.15, 0.2,0.5,0.75,1)) +
# scale_fill_gradientn(colours = paletteer_d("khroma::smoothrainbow"), limit = c(0,0.27), space = "Lab", name = expression(paste(italic("m"))), na.value = "transparent", guide = "colourbar", values = c(0, 0.05, 0.1, 0.15, 0.2,0.5,0.75,1)) +
geom_text(data = mcavBaSumm, aes(x = pop.i, y = pop.j, label = paste(mean, "\n", sep = "")), color = ifelse(mcavBaSumm$mean > 0.6, "white", "gray5"), fontface = ifelse(as.numeric(mcavBaSumm$hpdLow)>0, "bold", "plain"), size = ifelse(as.numeric(mcavBaSumm$hpdLow)>0, 4.75, 4)) +
geom_text(data = mcavBaSumm, aes(x = pop.i, y = pop.j, label = paste("\n(",hpdLow,"–",hpdHigh, ")", sep = "")), color = ifelse(mcavBaSumm$mean > 0.6, "white", "gray5"), size = 3.25) +
geom_text(data = (mcavBaLabs %>% filter(pop.j %in% c("Tortugas Bank\nMesophotic", "Tortugas Bank\nShallow", "Riley's Hump\nMesophotic", "Riley's Hump\nShallow"))), x = -.02, aes(y = pop.j, label = pop.j), size = 3.75, color = "#FFFFFF", family = "sans") +
geom_text(data = (mcavBaLabs %>% filter(!pop.j %in% c("Tortugas Bank\nMesophotic", "Tortugas Bank\nShallow", "Riley's Hump\nMesophotic", "Riley's Hump\nShallow"))), x = -.02, aes(y = pop.j, label = pop.j), size = 3.75, color = "#000000", family = "sans") +
geom_text(data = (mcavBaLabs %>% filter(pop.i %in% c("Tortugas Bank\nMesophotic", "Tortugas Bank\nShallow", "Riley's Hump\nMesophotic", "Riley's Hump\nShallow"))), y = -.01, aes(x = pop.i, label = pop.i), size = 3.75, color = "#FFFFFF", family = "sans") +
geom_text(data = (mcavBaLabs %>% filter(!pop.i %in% c("Tortugas Bank\nMesophotic", "Tortugas Bank\nShallow", "Riley's Hump\nMesophotic", "Riley's Hump\nShallow"))), y = -.01, aes(x = pop.i, label = pop.i), size = 3.75, color = "#000000", family = "sans") +
labs(x = "Sink", y = "Source") +
scale_y_discrete(limits = rev(levels(mcavBaSumm$pop.i))[c(1:8)], position = "left") +
coord_cartesian(xlim = c(1, 8), ylim = c(1, 8), clip = "off") +
theme_minimal()
mcavMigrate = mcavMigrateA + theme(
axis.text.x = element_text(vjust = 1, size = 12, hjust = 0.5, color = NA),
axis.text.y = element_text(size = 10, color = NA),
axis.title.x = element_text(size = 14, hjust = 0.425),
axis.title.y = element_text(size = 14, hjust = 0.425),
panel.grid.major = element_blank(),
axis.ticks = element_blank(),
# legend.position = c(1.055, 0.5),
legend.direction = "vertical",
legend.title = element_text(size = 12, face = "bold")
)
mcavMigrate
Checking deviance among model runs from BayesAss we ran on HPC
# fileList = substr(list.files("../data/snps/bayesAss/", "BA3trace.*.txt$"),1,10)
fileList = substr(list.files("../data/sint/BA3/", "BA3trace.*.txt$"),1,15)
bayesian_deviance <- function(trace, burnin = 0, sampling.interval = 0){
if(burnin == 0) stop('No burnin specified')
if(sampling.interval == 0) stop('No sampling interval specified')
range <- (trace$State > burnin & trace$State %% sampling.interval == 0)
D <- -2*mean(trace$LogProb[range])
return(D)
}
for(i in 1:length(fileList)){
assign(fileList[i], read.delim(paste("../data/sint/BA3/", fileList[i], ".txt", sep = ""))) %>% dplyr::select(-last_col())
print(paste(fileList[i], bayesian_deviance(get(fileList[i]), burnin = 5000000, sampling.interval = 1000)))
}
## [1] "sintBA3trace.01 4095847.462"
## [1] "sintBA3trace.02 4094678.242"
## [1] "sintBA3trace.03 4095399.988"
## [1] "sintBA3trace.04 4095630.518"
## [1] "sintBA3trace.05 4095470.046"
## [1] "sintBA3trace.06 4094276.6"
## [1] "sintBA3trace.07 4095140.89"
## [1] "sintBA3trace.08 4095926.754"
## [1] "sintBA3trace.09 4095283.68"
## [1] "sintBA3trace.10 4095434.772"
# [1] "sintBA3trace.01 4095847.462"
# [1] "sintBA3trace.02 4094678.242"
# [1] "sintBA3trace.03 4095399.988"
# [1] "sintBA3trace.04 4095630.518"
# [1] "sintBA3trace.05 4095470.046"
# [1] "sintBA3trace.06 4094276.6"
# [1] "sintBA3trace.07 4095140.89"
# [1] "sintBA3trace.08 4095926.754"
# [1] "sintBA3trace.09 4095283.68"
# [1] "sintBA3trace.10 4095434.772"
All traces have similar deviance (this is good!). Using the trace with the lowest deviance (BA3trace.02.txt, in this case)
sintBayesAss = read.delim("../data/sint/BA3/sintBA3trace.06.txt") %>% filter(State > 5000000) %>% dplyr::select(-State, -LogProb, -X)
sintBaMean = sintBayesAss %>% summarise(across(everything(), list(mean))) %>% t() %>% as_tibble() %>% rename(., mean=V1) %>% mutate(pops = colnames(sintBayesAss))
sintBaSumm = sintBayesAss %>% summarise(across(everything(), list(median))) %>% t() %>% as.data.frame() %>% rename(., median=V1) %>% mutate(pops = sintBaMean$pops, mean = round(sintBaMean$mean, 3)) %>% relocate(median, .after = mean)
sintBaSumm$median = round(sintBaSumm$median, 3)
sintBaHpd =as.data.frame(t(sapply(sintBayesAss, emp.hpd)))
colnames(sintBaHpd) = c("hpdLow", "hpdHigh")
sintBaHpd$pops = rownames(sintBaHpd)
sintESS = as.data.frame(sapply(sintBayesAss, ESS))
colnames(sintESS) = "ESS"
sintBaSumm = sintBaSumm %>% left_join(sintBaHpd)
## Joining with `by = join_by(pops)`
sintBaSumm$hpdLow = round(sintBaSumm$hpdLow, 3)
sintBaSumm$hpdHigh = round(sintBaSumm$hpdHigh, 3)
sintBaSumm$ESS = sintESS$ESS
### FROM BAYESASS: ###
## Population Index -> Population Label:
## 0->Geyer_Mesophotic 1->EFGB_Mesophotic
## 2->EFGB_Shallow 3->WFGB_Mesophotic
## 4->WFGB_Shallow 5->Bright_Mesophotic
## 6->McGrail_Mesophotic
popi = rep(c("Geyer\nMesophotic", "EFGB\nMesophotic", "EFGB\nShallow", "WFGB\nMesophotic", "WFGB\nShallow", "Bright\nMesophotic", "McGrail\nMesophotic"), each = 7)
popj = rep(c("Geyer\nMesophotic", "EFGB\nMesophotic", "EFGB\nShallow", "WFGB\nMesophotic", "WFGB\nShallow", "Bright\nMesophotic", "McGrail\nMesophotic"), times = 7)
sintBaSumm = sintBaSumm %>% mutate(pop.i = popi, pop.j = popj) %>% relocate(c(pop.i, pop.j), .after = pops) %>% dplyr::select(-pops)
sintBaSumm$pop.i = factor(sintBaSumm$pop.i)
sintBaSumm$pop.i = factor(sintBaSumm$pop.i, levels = levels(sintBaSumm$pop.i)[c(7, 3, 6, 2, 1, 4, 5)])
sintBaSumm$pop.j = factor(sintBaSumm$pop.j)
sintBaSumm$pop.j = factor(sintBaSumm$pop.j, levels = levels(sintBaSumm$pop.j)[c(7, 3, 6, 2, 1, 4, 5)])
sintBaSumm$site.i = word(sintBaSumm$pop.i, 1, sep = "\n")
sintBaSumm$site.i = factor(sintBaSumm$site.i)
sintBaSumm$site.i = factor(sintBaSumm$site.i, levels = levels(sintBaSumm$site.i)[c(5, 2, 1, 3, 4)])
sintBaSumm$site.j = word(sintBaSumm$pop.j, 1, sep = "\n")
sintBaSumm$site.j = factor(sintBaSumm$site.j)
sintBaSumm$site.j = factor(sintBaSumm$site.j, levels = levels(sintBaSumm$site.j)[c(5, 2, 1, 3, 4)])
sintBaSumm$depth.i = word(sintBaSumm$pop.i, 2, sep = "\n")
sintBaSumm$depth.i = factor(sintBaSumm$depth.i)
sintBaSumm$depth.i = factor(sintBaSumm$depth.i, levels = levels(sintBaSumm$depth.i)[c(2, 1)])
sintBaSumm$depth.j = word(sintBaSumm$pop.j, 2, sep = "\n")
sintBaSumm$depth.j = factor(sintBaSumm$depth.j)
sintBaSumm$depth.j = factor(sintBaSumm$depth.j, levels = levels(sintBaSumm$depth.j)[c(2, 1)])
#All sites (excluding self retention)
sintBaMeans = sintBaSumm %>% filter(pop.i != pop.j) %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Global")
#mesophotic sources
sintBaMeans = sintBaSumm %>% filter(pop.i != pop.j, depth.j == "Mesophotic") %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Mesophotic Source") %>% bind_rows(sintBaMeans, .)
#shallow sources
sintBaMeans = sintBaSumm %>% filter(pop.i != pop.j, depth.j == "Shallow") %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Shallow Source") %>% bind_rows(sintBaMeans, .)
#mesophotic sinks
sintBaMeans = sintBaSumm %>% filter(pop.i != pop.j, depth.i == "Mesophotic") %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Mesophotic Sink") %>% bind_rows(sintBaMeans, .)
#shallow sinks
sintBaMeans = sintBaSumm %>% filter(pop.i != pop.j, depth.i == "Shallow") %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Shallow Sink") %>% bind_rows(sintBaMeans, .)
#mesophotic -> shallow
sintBaMeans = sintBaSumm %>% filter(pop.i != pop.j, depth.j == "Mesophotic", depth.i == "Shallow") %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Mesophotic -> Shallow") %>% bind_rows(sintBaMeans, .)
#mesophotic -> mesophotic
sintBaMeans = sintBaSumm %>% filter(pop.i != pop.j, depth.j == "Mesophotic", depth.i == "Mesophotic") %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Mesophotic -> Mesophotic") %>% bind_rows(sintBaMeans, .)
#shallow -> mesophotic
sintBaMeans = sintBaSumm %>% filter(pop.i != pop.j, depth.j == "Shallow", depth.i == "Mesophotic") %>% summarise(mean = mean(.$mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Shallow -> Mesophotic") %>% bind_rows(sintBaMeans, .)
#shallow -> shallow
sintBaMeans = sintBaSumm %>% filter(pop.i != pop.j, depth.j == "Shallow", depth.i == "Shallow") %>% summarise(mean = round(mean(.$mean), 5), sd = round(sd(.$mean), 5), se = round(sd(.$mean)/sqrt(nrow(.)), 3)) %>% mutate(dataset = paste("Shallow -> Shallow")) %>% bind_rows(sintBaMeans, .) %>% relocate(dataset, .before = mean) %>% as.data.frame()
sintBaMeans[,c(2:4)] = sintBaMeans[,c(2:4)] %>% round(4)
sintBaMeans
## dataset mean sd se
## 1 Global 0.0422 0.0450 0.0069
## 2 Mesophotic Source 0.0358 0.0425 0.0078
## 3 Shallow Source 0.0584 0.0488 0.0141
## 4 Mesophotic Sink 0.0443 0.0445 0.0081
## 5 Shallow Sink 0.0371 0.0478 0.0138
## 6 Mesophotic -> Shallow 0.0282 0.0331 0.0105
## 7 Mesophotic -> Mesophotic 0.0396 0.0468 0.0105
## 8 Shallow -> Mesophotic 0.0538 0.0400 0.0126
## 9 Shallow -> Shallow 0.0815 0.1025 0.0730
sintBaMeansPub = sintBaMeans %>%
flextable() %>%
flextable::compose(part = "header", j = "dataset", value = as_paragraph("Dataset")) %>%
flextable::compose(part = "header", j = "mean", value = as_paragraph(as_i("m"))) %>%
flextable::compose(part = "header", j = "sd", value = as_paragraph("SD")) %>%
flextable::compose(part = "header", j = "se", value = as_paragraph("SEM")) %>%
flextable::font(fontname = "Times New Roman", part = "all") %>%
flextable::fontsize(size = 10, part = "all") %>%
flextable::bold(part = "header") %>%
flextable::align(align = "left", part = "all") %>%
flextable::autofit()
table = read_docx()
table = body_add_flextable(table, value = sintBaMeansPub)
print(table, target = "../tables/table2.docx")
sintBaMeansPub
Dataset | m | SD | SEM |
|---|---|---|---|
Global | 0.0422 | 0.0450 | 0.0069 |
Mesophotic Source | 0.0358 | 0.0425 | 0.0078 |
Shallow Source | 0.0584 | 0.0488 | 0.0141 |
Mesophotic Sink | 0.0443 | 0.0445 | 0.0081 |
Shallow Sink | 0.0371 | 0.0478 | 0.0138 |
Mesophotic -> Shallow | 0.0282 | 0.0331 | 0.0105 |
Mesophotic -> Mesophotic | 0.0396 | 0.0468 | 0.0105 |
Shallow -> Mesophotic | 0.0538 | 0.0400 | 0.0126 |
Shallow -> Shallow | 0.0815 | 0.1025 | 0.0730 |
sintBaSumm$mean = sprintf('%.3f', sintBaSumm$mean)
sintBaSumm$mean2 = sintBaSumm$mean
sintBaSumm$hpdLow = sprintf('%.3f', sintBaSumm$hpdLow)
sintBaSumm$hpdHigh = sprintf('%.3f', sintBaSumm$hpdHigh)
sintBaLabs = tibble(pop.i = unique(sintBaSumm$pop.i), pop.j = unique(sintBaSumm$pop.j))
sintMigrateA = ggplot(data = sintBaSumm, aes(pop.i, pop.j))+
geom_tile(data = subset(sintBaSumm, subset = sintBaSumm$mean2>0.65), fill = "gray35", color = "white") +
geom_segment(data = sintBaSumm, aes(x = 0.4755, xend = -0.42, y = pop.j, yend = pop.j, color = pop.j), size = 14) +
geom_segment(data = sintBaSumm, aes(x = pop.i, xend = pop.i, y = 0.45, yend = -0.425, color = pop.i), size = 32) +
scale_color_manual(values = gomPal[c(1:2, 1:5)], guide = NULL) +
guides(fill = guide_colorbar(ticks.colour = "black", barwidth = 1, barheight = 10, frame.colour = "black")) +
# new_scale("fill") +
geom_tile(data = subset(sintBaSumm, subset = sintBaSumm$mean<0.65), aes(fill = as.numeric(as.character(mean))), color = "white") +
scale_fill_gradientn(colours = paletteer_c("viridis::mako", n = 10, direction = -1)[c(1:7)], limit = c(0,0.23), space = "Lab", name = expression(paste(italic("m"))), na.value = "transparent", guide = "colourbar", values = c(0, 0.05, 0.1, 0.15, 0.2,0.5,0.75,1)) +
# scale_fill_gradientn(colours = paletteer_d("khroma::smoothrainbow"), limit = c(0,0.27), space = "Lab", name = expression(paste(italic("m"))), na.value = "transparent", guide = "colourbar", values = c(0, 0.05, 0.1, 0.15, 0.2,0.5,0.75,1)) +
geom_text(data = sintBaSumm, aes(x = pop.i, y = pop.j, label = paste(mean, "\n", sep = "")), color = ifelse(sintBaSumm$mean > 0.6, "white", "gray5"), fontface = ifelse(as.numeric(sintBaSumm$hpdLow)>0, "bold", "plain"), size = ifelse(as.numeric(sintBaSumm$hpdLow)>0, 4.75, 4)) +
geom_text(data = sintBaSumm, aes(x = pop.i, y = pop.j, label = paste("\n(",hpdLow,"–",hpdHigh, ")", sep = "")), color = ifelse(sintBaSumm$mean > 0.6, "white", "gray5"), size = 3.25) +
geom_text(data = (sintBaLabs %>% filter(pop.j %in% c("Tortugas Bank\nMesophotic", "Tortugas Bank\nShallow", "Riley's Hump\nMesophotic", "Riley's Hump\nShallow"))), x = -.02, aes(y = pop.j, label = pop.j), size = 3.75, color = "#FFFFFF", family = "sans") +
geom_text(data = (sintBaLabs %>% filter(!pop.j %in% c("Tortugas Bank\nMesophotic", "Tortugas Bank\nShallow", "Riley's Hump\nMesophotic", "Riley's Hump\nShallow"))), x = -.02, aes(y = pop.j, label = pop.j), size = 3.75, color = "#000000", family = "sans") +
geom_text(data = (sintBaLabs %>% filter(pop.i %in% c("Tortugas Bank\nMesophotic", "Tortugas Bank\nShallow", "Riley's Hump\nMesophotic", "Riley's Hump\nShallow"))), y = -.01, aes(x = pop.i, label = pop.i), size = 3.75, color = "#FFFFFF", family = "sans") +
geom_text(data = (sintBaLabs %>% filter(!pop.i %in% c("Tortugas Bank\nMesophotic", "Tortugas Bank\nShallow", "Riley's Hump\nMesophotic", "Riley's Hump\nShallow"))), y = -.01, aes(x = pop.i, label = pop.i), size = 3.75, color = "#000000", family = "sans") +
labs(x = "Sink", y = "Source") +
scale_y_discrete(limits = rev(levels(sintBaSumm$pop.i))[c(1:8)], position = "left") +
coord_cartesian(xlim = c(1, 8), ylim = c(1, 8), clip = "off") +
theme_minimal()
sintMigrate = sintMigrateA + theme(
axis.text.x = element_text(vjust = 1, size = 12, hjust = 0.5, color = NA),
axis.text.y = element_text(size = 10, color = NA),
axis.title.x = element_text(size = 14, hjust = 0.425),
axis.title.y = element_text(size = 14, hjust = 0.425),
panel.grid.major = element_blank(),
axis.ticks = element_blank(),
# legend.position = c(1.055, 0.5),
legend.direction = "vertical",
legend.title = element_text(size = 12, face = "bold")
)
sintMigrate
migratePlots = mcavMigrate + inset_element(mcavImg, -0.02, 0.85, 0.17, 1.0, align_to = "full", ignore_tag = TRUE) +
sintMigrate + inset_element(sintImg, -0.02, 0.85, 0.17, 1.0, align_to = "full", ignore_tag = TRUE) +
plot_annotation(tag_levels = c("A")) +
plot_layout(ncol = 1, guides = "collect") &
theme(plot.tag = element_text(size = 18),
legend.box.margin = unit(units = "cm",c(0,0,0,-2)),
legend.key = element_blank(),
legend.background = element_blank())
ggsave("../figures/figure6.png", plot = migratePlots, height = 9.5, width = 9.5, units = "in", dpi = 300)
gomTrees = mcavStructure + inset_element(mcavImg, 0.02, 0.85, 0.17, 1.0, align_to = "full", ignore_tag = TRUE) +
ofavStructure + inset_element(ofavImg, 0.02, 0.85, 0.17, 1.0, align_to = "full", ignore_tag = TRUE) +
sintStructure + inset_element(sintImg, 0.02, 0.85, 0.17, 1.0, align_to = "full", ignore_tag = TRUE) +
xestoStructure + inset_element(xestoImg, 0.02, 0.85, 0.17, 1.0, align_to = "full", ignore_tag = TRUE) +
plot_annotation(tag_levels = c("A")) +
theme(plot.tag = element_text(size = 18),
legend.spacing = unit(-5, "pt"),
legend.key = element_blank(),
legend.background = element_blank())
gomTrees
ggsave("../figures/extras/nwgom_trees.png", plot = gomTrees, dpi = 300, height = 10, width = 12, units = "in")
save.image("nwgom.RData")